Initial open source release of OpenCL 1.2 CTS.

2026-03-24 07:59:01 +00:00 · 2017-05-16 19:04:36 +05:30
parent 6911ba5116
commit f74871b7a3
563 changed files with 202074 additions and 0 deletions
--- a/test_conformance/images/kernel_read_write/CMakeLists.txt
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -0,0 +1,39 @@
+add_executable(conformance_test_image_streams
+        main.cpp  
+		test_iterations.cpp  
+		../image_helpers.cpp   
+		test_loops.cpp  
+		test_write_image.cpp  
+		test_read_3D.cpp  
+        test_write_3D.cpp
+		  ../../../test_common/harness/errorHelpers.c  
+		  ../../../test_common/harness/threadTesting.c  
+		  ../../../test_common/harness/kernelHelpers.c  
+		  ../../../test_common/harness/imageHelpers.cpp  
+		  ../../../test_common/harness/mt19937.c
+		  ../../../test_common/harness/conversions.c  
+		  ../../../test_common/harness/testHarness.c  
+		  ../../../test_common/harness/typeWrappers.cpp
+          ../../../test_common/harness/msvc9.c
+)
+
+set_source_files_properties(
+	main.cpp  
+		test_iterations.cpp  
+		../image_helpers.cpp   
+		test_loops.cpp  
+		test_write_image.cpp  
+		test_read_3D.cpp  
+        test_write_3D.cpp
+		  ../../../test_common/harness/errorHelpers.c  
+		  ../../../test_common/harness/threadTesting.c  
+		  ../../../test_common/harness/kernelHelpers.c  
+		  ../../../test_common/harness/imageHelpers.cpp  
+		  ../../../test_common/harness/conversions.c  
+		  ../../../test_common/harness/testHarness.c  
+		  ../../../test_common/harness/typeWrappers.cpp
+          ../../../test_common/harness/msvc9.c  
+        PROPERTIES LANGUAGE CXX)
+
+TARGET_LINK_LIBRARIES(conformance_test_image_streams
+        ${CLConform_LIBRARIES})
--- a/test_conformance/images/kernel_read_write/Jamfile
+++ b/test_conformance/images/kernel_read_write/Jamfile
@@ -0,0 +1,20 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+#      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_image_streams
+    : main.cpp
+      test_iterations.cpp
+      test_loops.cpp
+      test_read_3D.cpp
+      test_write_image.cpp
+      /images//image_helpers
+    ;
+
+install dist
+    : test_image_streams
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/images/kernel_read_write
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/images/kernel_read_write
+    ;
--- a/test_conformance/images/kernel_read_write/Makefile
+++ b/test_conformance/images/kernel_read_write/Makefile
@@ -0,0 +1,57 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp \
+	test_iterations.cpp \
+	../image_helpers.cpp  \
+	test_loops.cpp \
+	test_write_image.cpp \
+	test_read_1D.cpp \
+	test_read_3D.cpp \
+	test_read_1D_array.cpp \
+	test_read_2D_array.cpp \
+	test_write_1D.cpp \
+	test_write_3D.cpp \
+	test_write_1D_array.cpp \
+	test_write_2D_array.cpp \
+	../../../test_common/harness/errorHelpers.c \
+	../../../test_common/harness/threadTesting.c \
+	../../../test_common/harness/kernelHelpers.c \
+	../../../test_common/harness/imageHelpers.cpp \
+	../../../test_common/harness/conversions.c \
+	../../../test_common/harness/testHarness.c \
+	../../../test_common/harness/mt19937.c \
+	../../../test_common/harness/typeWrappers.cpp
+
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK =
+HEADERS = 
+TARGET = test_image_streams
+INCLUDE = -I../../test_common/harness
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CXX = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
--- a/test_conformance/images/kernel_read_write/main.cpp
+++ b/test_conformance/images/kernel_read_write/main.cpp
@@ -0,0 +1,438 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
+#include "../testBase.h"
+#include "../../../test_common/harness/fpcontrol.h"
+
+#if defined(__PPC__)
+// Global varaiable used to hold the FPU control register state. The FPSCR register can not
+// be used because not all Power implementations retain or observed the NI (non-IEEE 
+// mode) bit.
+__thread fpu_control_t fpu_control = 0;
+#endif
+
+bool			gDebugTrace = false, gExtraValidateInfo = false, gDisableOffsets = false, gTestSmallImages = false, gTestMaxImages = false, gTestRounding = false;
+cl_filter_mode	gFilterModeToUse = (cl_filter_mode)-1;
+// Default is CL_MEM_USE_HOST_PTR for the test
+cl_mem_flags	gMemFlagsToUse = CL_MEM_USE_HOST_PTR;  
+bool			gUseKernelSamplers = false;
+int				gTypesToTest = 0;
+cl_addressing_mode gAddressModeToUse = (cl_addressing_mode)-1;
+int             gNormalizedModeToUse = 7;
+cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
+cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
+bool			gEnablePitch = false;
+cl_device_type	gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+cl_command_queue queue;
+cl_context context;
+
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB		8.0
+
+void printUsage( const char *execName )
+{
+    const char *p = strrchr( execName, '/' );
+    if( p != NULL )
+        execName = p + 1;
+    
+    log_info( "Usage: %s [read] [write] [CL_FILTER_LINEAR|CL_FILTER_NEAREST] [no_offsets] [debug_trace] [small_images]\n", execName );
+    log_info( "Where:\n" );
+    log_info( "\n" );
+    log_info( "\tThe following flags specify what kinds of operations to test. They can be combined; if none are specified, all are tested:\n" );
+    log_info( "\t\tread - Tests reading from an image\n" );
+    log_info( "\t\twrite - Tests writing to an image (can be specified with read to run both; default is both)\n" );
+    log_info( "\n" );
+    log_info( "\tThe following flags specify the types to test. They can be combined; if none are specified, all are tested:\n" );
+    log_info( "\t\tint - Test integer I/O (read_imagei, write_imagei)\n" );
+    log_info( "\t\tuint - Test unsigned integer I/O (read_imageui, write_imageui)\n" );
+    log_info( "\t\tfloat - Test float I/O (read_imagef, write_imagef)\n" );
+    log_info( "\n" );
+    log_info( "\tCL_FILTER_LINEAR - Only tests formats with CL_FILTER_LINEAR filtering\n" );
+    log_info( "\tCL_FILTER_NEAREST - Only tests formats with CL_FILTER_NEAREST filtering\n" );
+    log_info( "\n" );
+    log_info( "\tNORMALIZED - Only tests formats with NORMALIZED coordinates\n" );
+    log_info( "\tUNNORMALIZED - Only tests formats with UNNORMALIZED coordinates\n" );
+    log_info( "\n" );
+    log_info( "\tCL_ADDRESS_CLAMP - Only tests formats with CL_ADDRESS_CLAMP addressing\n" );
+    log_info( "\tCL_ADDRESS_CLAMP_TO_EDGE - Only tests formats with CL_ADDRESS_CLAMP_TO_EDGE addressing\n" );
+    log_info( "\tCL_ADDRESS_REPEAT - Only tests formats with CL_ADDRESS_REPEAT addressing\n" );
+    log_info( "\tCL_ADDRESS_MIRRORED_REPEAT - Only tests formats with CL_ADDRESS_MIRRORED_REPEAT addressing\n" );
+    log_info( "\n" );
+    log_info( "You may also use appropriate CL_ channel type and ordering constants.\n" );
+    log_info( "\n" );    
+    log_info( "\t1D - Only test 1D images\n" );
+    log_info( "\t2D - Only test 2D images\n" );
+    log_info( "\t3D - Only test 3D images\n" );
+    log_info( "\t1Darray - Only test 1D image arrays\n" );
+    log_info( "\t2Darray - Only test 2D image arrays\n" );
+    log_info( "\n" );
+    log_info( "\tlocal_samplers - Use samplers declared in the kernel functions instead of passed in as arguments\n" );
+    log_info( "\n" );
+    log_info( "\tThe following specify to use the specific flag to allocate images to use in the tests:\n" );
+    log_info( "\t\tCL_MEM_COPY_HOST_PTR\n" );
+    log_info( "\t\tCL_MEM_USE_HOST_PTR (default)\n" );
+    log_info( "\t\tCL_MEM_ALLOC_HOST_PTR\n" );
+    log_info( "\t\tNO_HOST_PTR - Specifies to use none of the above flags\n" );
+    log_info( "\n" );
+    log_info( "\tThe following modify the types of images tested:\n" );
+    log_info( "\t\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
+    log_info( "\t\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
+    log_info( "\t\trounding - Runs every format through a single image filled with every possible value for that image format, to verify rounding works properly\n" );
+    log_info( "\n" );
+    log_info( "\tno_offsets - Disables offsets when testing reads (can be good for diagnosing address repeating/clamping problems)\n" );
+    log_info( "\tdebug_trace - Enables additional debug info logging\n" );
+    log_info( "\textra_validate - Enables additional validation failure debug information\n" );
+    log_info( "\tuse_pitches - Enables row and slice pitches\n" );
+}
+
+
+
+enum TestTypes
+{
+    kReadTests = 1 << 0 ,
+    kWriteTests = 1 << 1,
+    kAllTests = ( kReadTests | kWriteTests )
+};
+
+extern int test_image_set( cl_device_id device, test_format_set_fn formatTestFn, cl_mem_object_type imageType );
+
+int main(int argc, const char *argv[])
+{
+    cl_platform_id  platform;
+    cl_device_id   	device;
+    cl_channel_type chanType;
+    cl_channel_order chanOrder;
+    char			str[ 128 ];
+    int				testTypesToRun = 0;
+    int             testMethods = 0;    
+    bool			randomize = false;
+    
+    test_start();
+    
+    //Check CL_DEVICE_TYPE environment variable
+    checkDeviceTypeOverride( &gDeviceType );    
+    
+    // Parse arguments
+    for( int i = 1; i < argc; i++ )
+    {
+        strncpy( str, argv[ i ], sizeof( str ) - 1 );
+        
+        if( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+        
+        else if( strcmp( str, "debug_trace" ) == 0 )
+            gDebugTrace = true;
+        
+        else if( strcmp( str, "CL_FILTER_NEAREST" ) == 0 || strcmp( str, "NEAREST" ) == 0 )
+            gFilterModeToUse = CL_FILTER_NEAREST;
+        else if( strcmp( str, "CL_FILTER_LINEAR" ) == 0 || strcmp( str, "LINEAR" ) == 0 )
+            gFilterModeToUse = CL_FILTER_LINEAR;
+        
+        else if( strcmp( str, "CL_ADDRESS_NONE" ) == 0 )
+            gAddressModeToUse = CL_ADDRESS_NONE;
+        else if( strcmp( str, "CL_ADDRESS_CLAMP" ) == 0 )
+            gAddressModeToUse = CL_ADDRESS_CLAMP;
+        else if( strcmp( str, "CL_ADDRESS_CLAMP_TO_EDGE" ) == 0 )
+            gAddressModeToUse = CL_ADDRESS_CLAMP_TO_EDGE;
+        else if( strcmp( str, "CL_ADDRESS_REPEAT" ) == 0 )
+            gAddressModeToUse = CL_ADDRESS_REPEAT;
+        else if( strcmp( str, "CL_ADDRESS_MIRRORED_REPEAT" ) == 0 )
+            gAddressModeToUse = CL_ADDRESS_MIRRORED_REPEAT;
+        
+        else if( strcmp( str, "NORMALIZED" ) == 0 )
+            gNormalizedModeToUse = true;
+        else if( strcmp( str, "UNNORMALIZED" ) == 0 )
+            gNormalizedModeToUse = false;
+        
+        
+        else if( strcmp( str, "no_offsets" ) == 0 )
+            gDisableOffsets = true;
+        else if( strcmp( str, "small_images" ) == 0 )
+            gTestSmallImages = true;
+        else if( strcmp( str, "max_images" ) == 0 )
+            gTestMaxImages = true;
+        else if( strcmp( str, "use_pitches" ) == 0 )
+            gEnablePitch = true;
+        else if( strcmp( str, "rounding" ) == 0 )
+            gTestRounding = true;
+        else if( strcmp( str, "extra_validate" ) == 0 )
+            gExtraValidateInfo = true;
+        
+        else if( strcmp( str, "read" ) == 0 )
+            testTypesToRun |= kReadTests;
+        else if( strcmp( str, "write" ) == 0 )
+            testTypesToRun |= kWriteTests;
+        
+        else if( strcmp( str, "local_samplers" ) == 0 )
+            gUseKernelSamplers = true;
+        
+        else if( strcmp( str, "int" ) == 0 )
+            gTypesToTest |= kTestInt;
+        else if( strcmp( str, "uint" ) == 0 )
+            gTypesToTest |= kTestUInt;
+        else if( strcmp( str, "float" ) == 0 )
+            gTypesToTest |= kTestFloat;
+        
+        else if( strcmp( str, "randomize" ) == 0 )
+            randomize = true;
+        
+        else if ( strcmp( str, "1D" ) == 0 )
+            testMethods |= k1D;
+		else if( strcmp( str, "2D" ) == 0 )
+            testMethods |= k2D;
+		else if( strcmp( str, "3D" ) == 0 )
+            testMethods |= k3D;
+		else if( strcmp( str, "1Darray" ) == 0 )
+			testMethods |= k1DArray;
+		else if( strcmp( str, "2Darray" ) == 0 )
+			testMethods |= k2DArray;
+        
+        else if( strcmp( str, "CL_MEM_COPY_HOST_PTR" ) == 0 || strcmp( str, "COPY_HOST_PTR" ) == 0 )
+            gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
+        else if( strcmp( str, "CL_MEM_USE_HOST_PTR" ) == 0 || strcmp( str, "USE_HOST_PTR" ) == 0 )
+            gMemFlagsToUse = CL_MEM_USE_HOST_PTR;
+        else if( strcmp( str, "CL_MEM_ALLOC_HOST_PTR" ) == 0 || strcmp( str, "ALLOC_HOST_PTR" ) == 0 )
+            gMemFlagsToUse = CL_MEM_ALLOC_HOST_PTR;
+        else if( strcmp( str, "NO_HOST_PTR" ) == 0 )
+            gMemFlagsToUse = 0;
+        
+        else if( strcmp( str, "help" ) == 0 || strcmp( str, "?" ) == 0 )
+        {
+            printUsage( argv[ 0 ] );
+            return -1;
+        }
+        
+        else if( ( chanType = get_channel_type_from_name( str ) ) != (cl_channel_type)-1 )
+            gChannelTypeToUse = chanType;
+        
+        else if( ( chanOrder = get_channel_order_from_name( str ) ) != (cl_channel_order)-1 )
+            gChannelOrderToUse = chanOrder;
+        else
+        {
+            log_error( "ERROR: Unknown argument %d: %s.  Exiting....\n", i, str );
+            return -1;
+        }
+        
+    }
+    
+    if (testMethods == 0)
+        testMethods = k1D | k2D | k3D | k1DArray | k2DArray;
+    if( testTypesToRun == 0 )
+        testTypesToRun = kAllTests;
+    if( gTypesToTest == 0 )
+        gTypesToTest = kTestAllTypes;
+    
+#if defined( __APPLE__ )
+#if defined( __i386__ ) || defined( __x86_64__ )
+#define	kHasSSE3                0x00000008
+#define kHasSupplementalSSE3	0x00000100
+#define	kHasSSE4_1              0x00000400
+#define	kHasSSE4_2              0x00000800
+    /* check our environment for a hint to disable SSE variants */
+    {
+        const char *env = getenv( "CL_MAX_SSE" );
+        if( env )
+        {
+            extern int _cpu_capabilities;
+            int mask = 0;
+            if( 0 == strcmp( env, "SSE4.1" ) )
+                mask = kHasSSE4_2;
+            else if( 0 == strcmp( env, "SSSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1;
+            else if( 0 == strcmp( env, "SSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
+            else if( 0 == strcmp( env, "SSE2" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
+            
+            log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
+            _cpu_capabilities &= ~mask;
+        }
+    }
+#endif
+#endif
+    
+    // Seed the random # generators
+    if( randomize )
+    {
+        gRandomSeed = (unsigned) (((int64_t) clock() * 1103515245 + 12345) >> 8);
+        gReSeed = 1;
+        log_info( "Random seed: %d\n", gRandomSeed );
+    }
+    
+    int error;
+    // Get our platform
+    error = clGetPlatformIDs(1, &platform, NULL);
+    if( error )
+    {
+        print_error( error, "Unable to get platform" );
+        test_finish();
+        return -1;
+    }
+    
+    // Get our device
+    error = clGetDeviceIDs(platform,  gDeviceType, 1, &device, NULL );
+    if( error )
+    {
+        print_error( error, "Unable to get specified device" );
+        test_finish();
+        return -1;
+    }
+    
+    // Get the device type so we know if it is a GPU even if default is passed in.
+    error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(gDeviceType), &gDeviceType, NULL);
+    if( error )
+    {
+        print_error( error, "Unable to get device type" );
+        test_finish();
+        return -1;
+    }
+    
+    
+	if( printDeviceHeader( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+    
+    // Check for image support
+    if(checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
+        log_info("Device does not support images. Skipping test.\n");
+        test_finish();
+        return 0;
+    }
+    
+    // Create a context to test with
+    context = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing context" );
+        test_finish();
+        return -1;
+    }
+    
+    // Create a queue against the context
+    queue = clCreateCommandQueue( context, device, 0, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing command queue" );
+        test_finish();
+        return -1;
+    }
+    
+    if( gTestSmallImages )
+        log_info( "Note: Using small test images\n" );
+    
+    // On most platforms which support denorm, default is FTZ off. However, 
+    // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+    // This creates issues in result verification. Since spec allows the implementation to either flush or 
+    // not flush denorms to zero, an implementation may choose not to flush i.e. return denorm result whereas
+    // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+    // where reference is being computed to make sure we get non-flushed reference result. If implementation 
+    // returns flushed result, we correctly take care of that in verification code.
+    
+    FPU_mode_type oldMode;
+    DisableFTZ(&oldMode);
+    
+    // Run the test now
+    int ret = 0;
+    if (testMethods & k1D)
+    {
+        if (testTypesToRun & kReadTests)
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D );
+        if (testTypesToRun & kWriteTests)
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D );
+    }
+    if (testMethods & k2D)
+    {
+        if (testTypesToRun & kReadTests)
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
+        if (testTypesToRun & kWriteTests)
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );        
+    }
+    if (testMethods & k3D)
+    {
+        if (testTypesToRun & kReadTests)
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE3D );
+        if (testTypesToRun & kWriteTests)
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE3D );        
+    }
+    if (testMethods & k1DArray)
+    {
+        if (testTypesToRun & kReadTests)
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
+        if (testTypesToRun & kWriteTests)
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );        
+    }
+    if (testMethods & k2DArray)
+    {
+        if (testTypesToRun & kReadTests)
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
+        if (testTypesToRun & kWriteTests)
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );        
+    }
+    
+    // Restore FP state before leaving
+    RestoreFPState(&oldMode);
+
+    error = clFinish(queue);
+    if (error)
+        print_error(error, "clFinish failed.");
+    
+    clReleaseContext(context);
+    clReleaseCommandQueue(queue);
+    
+    if (gTestFailure == 0) {
+        if (gTestCount > 1) 
+            log_info("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+        else
+            log_info("PASSED test.\n");
+    } else if (gTestFailure > 0) {
+        if (gTestCount > 1)
+            log_error("FAILED %d of %d tests.\n", gTestFailure, gTestCount);
+        else 
+            log_error("FAILED test.\n");
+    }  
+    
+    // Clean up
+    test_finish();
+    
+    if (gTestFailure > 0)
+        return gTestFailure;
+    
+    return ret;
+}
+
+
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -0,0 +1,946 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#if defined( __APPLE__ )
+    #include <signal.h>
+    #include <sys/signal.h>
+    #include <setjmp.h>
+#endif
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_device_type    gDeviceType;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
+const char *read2DKernelSourcePattern = 
+"__kernel void sample_kernel( read_only image2d_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results )\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"   int offset = tidY*get_image_width(input) + tidX;\n"
+"%s"
+"   results[offset] = read_image%s( input, imageSampler, coords );\n"
+"}";
+
+const char *intCoordKernelSource = 
+"   int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
+
+const char *floatKernelSource = 
+"   float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
+
+static const char *samplerKernelArg = " sampler_t imageSampler,";
+
+#define ABS_ERROR( result, expected ) ( fabsf( (float)expected - (float)result ) )
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, 
+                            int x, int y, int z, float *outData );
+template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                T *resultPtr, T * expected, float error,
+                                float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat )
+{
+    int actualX, actualY;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, clampedY, ignoreMe;
+    
+    clamped = get_integer_coords_offset( x, y, 0.f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->height, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
+
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX && clampedY == actualY )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
+        if( printAsFloat )
+        {
+          log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                    (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                    (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+        }
+        else
+        {
+          log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                    (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                    (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+        }
+        return 1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
+        if( printAsFloat )
+        {
+          log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                    (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                    (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+        }
+        else
+        {
+          log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                    (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                    (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+        }
+        return 1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        if( printAsFloat )
+        {
+            log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g), error of %g\n",
+                      (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                      (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+        }
+        else
+        {
+            log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                      (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                                (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+        }
+        log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->height, imageInfo->rowPitch );
+        if( clamped )
+        {
+            log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
+        }
+        if( printAsFloat && gExtraValidateInfo)
+        {
+            log_error( "Nearby values:\n" );
+            log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
+            for( int yOff = -2; yOff <= 1; yOff++ )
+            {
+                float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
+                log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
+                log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
+                log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
+            }
+            
+            if( clampedY < 1 )
+            {
+                log_error( "Nearby values:\n" );
+                log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
+                for( int yOff = (int)imageInfo->height - 2; yOff <= (int)imageInfo->height + 1; yOff++ )
+                {
+                    float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
+                    log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
+                    log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                    log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
+                    log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
+                }
+            }
+        }
+
+        if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+        {
+            if( found )
+                log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
+            else
+                log_error( "\tValue not actually found in image\n" );
+        }
+        log_error( "\n" );
+        
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+        {
+            return 1;
+        }
+    }    
+    return 0;
+}
+
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
+{
+    size_t i = 0;
+    if( gDisableOffsets )
+    {
+        for( size_t y = 0; y < imageInfo->height; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) (xfract + (double) x);
+                yOffsets[ i ] = (float) (yfract + (double) y);
+            }
+        }
+    }
+    else
+    {
+        for( size_t y = 0; y < imageInfo->height; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+                yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
+            }
+        }
+    }
+
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t y = 0; y < imageInfo->height; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
+                yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->height - 1.0);
+            }
+        }        
+    }
+
+    if( normalized_coords )
+    {
+        i = 0;
+        for( size_t y = 0; y < imageInfo->height; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
+                yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
+            }
+        }        
+    }
+}
+
+#ifndef MAX
+    #define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+
+int test_read_image_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, image_sampler_data *imageSampler, 
+                       bool useFloatCoords, ExplicitType outputType, MTdata d )
+{
+    int error;
+    static int initHalf = 0;
+
+    size_t threads[2];
+    
+    clMemWrapper xOffsets, yOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+
+    // The DataBuffer template class really does use delete[], not free -- IRO
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
+    BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
+
+    if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+        if( DetectFloatToHalfRoundingMode(queue) )
+            return 1;
+        
+    // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+    
+    if( gDebugTrace )
+        log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
+  
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+    
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+        // Do not use protected images for max image size test since it rounds the row size to a page size
+        if (gTestMaxImages) {
+          generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+          unprotImage = create_image_2d( context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, imageInfo->format, 
+                                        imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                        maxImageUseHostPtrBackingStore, &error );
+        } else {
+                error = protImage.Create( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height );
+        }
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        
+        if (gTestMaxImages)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+        unprotImage = create_image_2d( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imageInfo->format, 
+                                      imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                      imageValues, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+        // it works just as if no flag is specified, so we just do the same thing either way
+        unprotImage = create_image_2d( context, CL_MEM_READ_ONLY | gMemFlagsToUse, imageInfo->format, 
+                                      imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                      imageValues, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+  
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+        
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
+        
+        error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                    origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
+                                   imageValues, 0, NULL, NULL);
+        if (error != CL_SUCCESS) 
+        {
+            log_error( "ERROR: Unable to write to 2D image of size %d x %d\n", (int)imageInfo->width, (int)imageInfo->height );
+            return error;
+        }
+    }
+
+    if( gDebugTrace )
+        log_info( " - Creating kernel arguments...\n" );
+    
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, yOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+    
+    // Create sampler to use
+    actualSampler = clCreateSampler( context, (cl_bool)imageSampler->normalized_coords, imageSampler->addressing_mode, imageSampler->filter_mode, &error );
+    test_error( error, "Unable to create image sampler" );
+    
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+    
+    // A cast of troublesome offsets. The first one has to be zero.
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+      log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }
+  
+	// Get the maximum absolute error for this format
+  	double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler); 
+  	if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+
+    for( int q = 0; q < loopCount; q++ )
+    {
+        float offset = float_offsets[ q % float_offset_count ];
+
+        // Init the coordinates
+        InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues, 
+                            q>=float_offset_count ? -offset: offset, 
+                            q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d );
+
+        error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, xOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write x offsets" );
+        error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, yOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write y offsets" );
+        
+        // Get results
+        size_t resultValuesSize = imageInfo->width * imageInfo->height * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
+        memset( resultValues, 0xff, resultValuesSize );
+        clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+        // Run the kernel
+        threads[0] = (size_t)imageInfo->width;
+        threads[1] = (size_t)imageInfo->height;
+        error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+        
+        if( gDebugTrace )
+            log_info( "    reading results, %ld kbytes\n", (unsigned long)( imageInfo->width * imageInfo->height * get_explicit_type_size( outputType ) * 4 / 1024 ) );
+     
+        error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, imageInfo->width * imageInfo->height * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+          
+        // Validate results element by element
+        char *imagePtr = imageValues;
+                /*
+                 * FLOAT output type
+                 */
+        if( outputType == kFloat )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+            for( size_t y = 0, j = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    float offset = NORM_OFFSET;
+                    if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                        // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                        || gDeviceType != CL_DEVICE_TYPE_GPU 
+#endif
+                    ) 
+                        offset = 0.0f;          // Loop only once
+                        
+                    for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
+                            
+                            
+                            // Try sampling the pixel, without flushing denormals.
+                            int containsDenormals = 0;
+                            FloatPixel maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                            xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                            imageSampler, expected, 0, &containsDenormals );
+                            
+                            float err1 = fabsf( resultPtr[0] - expected[0] );
+                            float err2 = fabsf( resultPtr[1] - expected[1] );
+                            float err3 = fabsf( resultPtr[2] - expected[2] );
+                            float err4 = fabsf( resultPtr[3] - expected[3] );
+                            // Clamp to the minimum absolute error for the format
+                            if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                            if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                            if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                            if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                            float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                            float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                            float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                            float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                            
+                            // Check if the result matches.
+                            if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                            {
+                                //try flushing the denormals, if there is a failure.
+                                if( containsDenormals )
+                                {
+                                   // If implementation decide to flush subnormals to zero, 
+                                   // max error needs to be adjusted
+                                    maxErr1 += 4 * FLT_MIN;
+                                    maxErr2 += 4 * FLT_MIN;
+                                    maxErr3 += 4 * FLT_MIN;
+                                    maxErr4 += 4 * FLT_MIN;
+                                    
+                                    maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                 imageSampler, expected, 0, NULL );
+                                    
+                                    err1 = fabsf( resultPtr[0] - expected[0] );
+                                    err2 = fabsf( resultPtr[1] - expected[1] );
+                                    err3 = fabsf( resultPtr[2] - expected[2] );
+                                    err4 = fabsf( resultPtr[3] - expected[3] );                    
+                                }
+                            }
+                            
+                            // If the final result DOES match, then we've found a valid result and we're done with this pixel.
+                            found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);                             
+                        }//norm_offset_x
+                    }//norm_offset_y
+                    
+                    
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    norm_offset_y = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+                                
+                                int containsDenormals = 0;
+                                FloatPixel maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                        imageSampler, expected, 0, &containsDenormals );
+                                
+                                float err1 = fabsf( resultPtr[0] - expected[0] );
+                                float err2 = fabsf( resultPtr[1] - expected[1] );
+                                float err3 = fabsf( resultPtr[2] - expected[2] );
+                                float err4 = fabsf( resultPtr[3] - expected[3] );
+                                float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                
+                                
+                                if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                    ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                                {
+                                    //try flushing the denormals, if there is a failure.
+                                    if( containsDenormals )
+                                    {
+                                        maxErr1 += 4 * FLT_MIN;
+                                        maxErr2 += 4 * FLT_MIN;
+                                        maxErr3 += 4 * FLT_MIN;
+                                        maxErr4 += 4 * FLT_MIN;                                    
+                                    
+                                        maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                     imageSampler, expected, 0, NULL );
+                                        
+                                        err1 = fabsf( resultPtr[0] - expected[0] );
+                                        err2 = fabsf( resultPtr[1] - expected[1] );
+                                        err3 = fabsf( resultPtr[2] - expected[2] );
+                                        err4 = fabsf( resultPtr[3] - expected[3] );                    
+                                    }
+                                }
+                                if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                    ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                                {                       
+                                    log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+                                    
+                                    float tempOut[4];
+                                    shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                        expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true );
+                                    
+                                    log_error( "Step by step:\n" );
+                                    FloatPixel temp = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                        imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
+                                    log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",    
+                                                        Ulp_Error( resultPtr[0], expected[0] ),
+                                                        Ulp_Error( resultPtr[1], expected[1] ),
+                                                        Ulp_Error( resultPtr[2], expected[2] ),
+                                                        Ulp_Error( resultPtr[3], expected[3] ),
+                                                        Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                    
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                                
+                            }//norm_offset_x
+                        }//norm_offset_y
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+                    
+                    resultPtr += 4;
+                }
+            }
+        }
+            /*
+             * UINT output type
+             */            
+        else if( outputType == kUInt )
+        {
+            // Validate unsigned integer results
+            unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+            unsigned int expected[4];
+            float error;
+            for( size_t y = 0, j = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                            
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                norm_offset_y = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+                            
+                            sample_image_pixel_offset<unsigned int>( imageValues, imageInfo, 
+                                                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, 
+                                                                                             imageSampler, expected );
+                            
+                            
+                            error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                         errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+                            
+                            if (error <= MAX_ERR) 
+                                found_pixel = 1;
+                        }//norm_offset_x
+                    }//norm_offset_y
+                    
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    norm_offset_y = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+                                
+                                sample_image_pixel_offset<unsigned int>( imageValues, imageInfo, 
+                                                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, 
+                                                                                                 imageSampler, expected );
+                                
+                                
+                                error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                             errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                if( error > MAX_ERR )
+                                {
+                                    log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+
+                                    shouldReturn |= determine_validation_error<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false );
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                            }//norm_offset_x
+                        }//norm_offset_y
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+                    
+                    resultPtr += 4;
+                }
+            }
+        }
+            /*
+             * INT output type
+             */                        
+        else
+        {
+            // Validate integer results
+            int *resultPtr = (int *)(char *)resultValues;
+            int expected[4];
+            float error;
+            for( size_t y = 0, j = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                            
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                norm_offset_y = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+                            
+                            sample_image_pixel_offset<int>( imageValues, imageInfo, 
+                                                            xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, 
+                                                            imageSampler, expected );
+                            
+                            
+                            error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                         errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                            
+                            if (error <= MAX_ERR) 
+                                found_pixel = 1;
+                        }//norm_offset_x
+                    }//norm_offset_y
+                    
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    norm_offset_y = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+                                
+                                sample_image_pixel_offset<int>( imageValues, imageInfo, 
+                                                                xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, 
+                                                                imageSampler, expected );
+                                
+                                
+                                error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                             errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                
+                                if( error > MAX_ERR )
+                                {
+                                    log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+                                    
+                                    shouldReturn |= determine_validation_error<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false );
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                            }//norm_offset_x
+                        }//norm_offset_y
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+                    
+                    resultPtr += 4;
+                }
+            }
+        }    
+    }
+  
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+}
+
+int test_read_image_set_2D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, 
+                        bool floatCoords, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    RandomSeed seed( gRandomSeed );
+    int error;
+    
+    // Get our operating params
+    size_t maxWidth, maxHeight;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0x0 };
+    size_t pixelSize;
+    
+    imageInfo.format = format;
+    imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+    pixelSize = get_pixel_size( imageInfo.format );
+    
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+    
+    // Determine types
+    if( outputType == kInt )
+        readFormat = "i";
+    else if( outputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+    
+    // Construct the source
+    const char *samplerArg = samplerKernelArg;
+    char samplerVar[ 1024 ] = "";
+    if( gUseKernelSamplers )
+    {
+        get_sampler_kernel_code( imageSampler, samplerVar );
+        samplerArg = "";
+    }
+    
+    sprintf( programSrc, read2DKernelSourcePattern, samplerArg, get_explicit_type_name( outputType ),
+            samplerVar,
+            floatCoords ? floatKernelSource : intCoordKernelSource,
+            readFormat );
+    
+    ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+	
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
+
+                int retCode = test_read_image_2D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );    
+                if( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format);
+        
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
+            int retCode = test_read_image_2D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+  }
+    else if( gTestRounding )
+    {
+        uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
+        typeRange /= pixelSize / get_format_type_size( imageInfo.format );
+        imageInfo.height = (size_t)( ( typeRange + 255LL ) / 256LL );
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+        while( imageInfo.height >= maxHeight / 2 )
+        {
+            imageInfo.width <<= 1;
+            imageInfo.height >>= 1;
+        }
+
+        while( imageInfo.width >= maxWidth / 2 )
+            imageInfo.width >>= 1;
+        imageInfo.rowPitch = imageInfo.width * pixelSize;
+            
+        gRoundingStartValue = 0;
+        do
+        {
+            if( gDebugTrace )
+                log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
+            int retCode = test_read_image_2D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+            
+            gRoundingStartValue += imageInfo.width * imageInfo.height * pixelSize / get_format_type_size( imageInfo.format );
+
+        } while( gRoundingStartValue < typeRange );
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {            
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                if( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.rowPitch += extraWidth * pixelSize;
+                }
+                
+                size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+            
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
+            int retCode = test_read_image_2D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+    
+    return 0;
+}
--- a/test_conformance/images/kernel_read_write/test_loops.cpp
+++ b/test_conformance/images/kernel_read_write/test_loops.cpp
@@ -0,0 +1,374 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+extern cl_context context;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type	  gChannelTypeToUse;
+extern cl_channel_order	  gChannelOrderToUse;
+
+extern bool gDebugTrace;
+
+extern int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, 
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_2D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, 
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, 
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, 
+                                        bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, 
+                                        bool floatCoords, ExplicitType outputType );
+
+static const char *str_1d_image = "1D";
+static const char *str_2d_image = "2D";
+static const char *str_3d_image = "3D";
+static const char *str_1d_image_array = "1D array";
+static const char *str_2d_image_array = "2D array";
+
+static const char *convert_image_type_to_string(cl_mem_object_type imageType)
+{
+    const char *p;
+    switch (imageType)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            p = str_1d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            p = str_2d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            p = str_3d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            p = str_1d_image_array;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            p = str_2d_image_array;
+            break;
+    }
+    return p;
+}
+
+int filter_formats( cl_image_format *formatList, bool *filterFlags, unsigned int formatCount, cl_channel_type *channelDataTypesToFilter )
+{
+    int numSupported = 0;
+    for( unsigned int j = 0; j < formatCount; j++ )
+    {
+        // If this format has been previously filtered, remove the filter
+        if( filterFlags[ j ] )
+            filterFlags[ j ] = false;
+        
+        // Have we already discarded the channel type via the command line?
+        if( gChannelTypeToUse != (cl_channel_type)-1 && gChannelTypeToUse != formatList[ j ].image_channel_data_type )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+        
+        // Have we already discarded the channel order via the command line?
+        if( gChannelOrderToUse != (cl_channel_order)-1 && gChannelOrderToUse != formatList[ j ].image_channel_order )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // Is given format standard channel order and type given by spec. We don't want to test it if this is vendor extension
+        if( !IsChannelOrderSupported( formatList[ j ].image_channel_order ) || !IsChannelTypeSupported( formatList[ j ].image_channel_data_type ) )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        if ( !channelDataTypesToFilter )
+        {
+            numSupported++;
+            continue;
+        }
+        
+        // Is the format supported?
+        int i;
+        for( i = 0; channelDataTypesToFilter[ i ] != (cl_channel_type)-1; i++ )
+        {
+            if( formatList[ j ].image_channel_data_type == channelDataTypesToFilter[ i ] )
+            {
+                numSupported++;
+                break;
+            }
+        }
+        if( channelDataTypesToFilter[ i ] == (cl_channel_type)-1 )
+        {
+            // Format is NOT supported, so mark it as such
+            filterFlags[ j ] = true;
+        }
+    }
+    return numSupported;
+}
+
+int get_format_list( cl_device_id device, cl_mem_object_type imageType, cl_image_format * &outFormatList, unsigned int &outFormatCount, cl_mem_flags flags )
+{
+    int error;
+    
+    cl_image_format tempList[ 128 ];
+    error = clGetSupportedImageFormats( context, flags,
+                                       imageType, 128, tempList, &outFormatCount );
+    test_error( error, "Unable to get count of supported image formats" );
+    
+    outFormatList = new cl_image_format[ outFormatCount ];
+    error = clGetSupportedImageFormats( context, flags,
+                                       imageType, outFormatCount, outFormatList, NULL );
+    test_error( error, "Unable to get list of supported image formats" );
+    
+    return 0;
+}
+
+int test_read_image_type( cl_device_id device, cl_image_format *format, bool floatCoords, 
+                         image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
+{
+    int ret = 0;
+    cl_addressing_mode addressModes[] = { /* CL_ADDRESS_CLAMP_NONE,*/ CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT, (cl_addressing_mode)-1 };
+    
+    
+    for( int adMode = 0; addressModes[ adMode ] != (cl_addressing_mode)-1; adMode++ )
+    {        
+        imageSampler->addressing_mode = addressModes[ adMode ];
+        
+        if( (addressModes[ adMode ] == CL_ADDRESS_REPEAT || addressModes[ adMode ] == CL_ADDRESS_MIRRORED_REPEAT) && !( imageSampler->normalized_coords ) )
+            continue; // Repeat doesn't make sense for non-normalized coords
+        
+        // Use this run if we were told to only run a certain filter mode
+        if( gAddressModeToUse != (cl_addressing_mode)-1 && imageSampler->addressing_mode != gAddressModeToUse )
+            continue;
+        
+        /*
+         Remove redundant check to see if workaround still necessary
+         // Check added in because this case was leaking through causing a crash on CPU
+         if( ! imageSampler->normalized_coords && imageSampler->addressing_mode == CL_ADDRESS_REPEAT )
+         continue;       //repeat mode requires normalized coordinates
+         */		
+        print_read_header( format, imageSampler, false );
+        
+        gTestCount++;
+        
+        int retCode = 0;
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                retCode = test_read_image_set_1D( device, format, imageSampler, floatCoords, outputType );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                retCode = test_read_image_set_1D_array( device, format, imageSampler, floatCoords, outputType );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                retCode = test_read_image_set_2D( device, format, imageSampler, floatCoords, outputType );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                retCode = test_read_image_set_2D_array( device, format, imageSampler, floatCoords, outputType );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                retCode = test_read_image_set_3D( device, format, imageSampler, floatCoords, outputType );
+                break;
+        }
+        if( retCode != 0 )
+        {
+            gTestFailure++;
+            log_error( "FAILED: " );
+            print_read_header( format, imageSampler, true );
+            log_info( "\n" );
+        }
+        ret |= retCode;
+    }
+    
+    return ret;
+}
+
+int test_read_image_formats( cl_device_id device, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+                            image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
+{
+    int ret = 0;
+    bool flipFlop[2] = { false, true };
+    int normalizedIdx, floatCoordIdx;
+    
+    
+    // Use this run if we were told to only run a certain filter mode
+    if( gFilterModeToUse != (cl_filter_mode)-1 && imageSampler->filter_mode != gFilterModeToUse )
+        return 0;
+    
+    // Test normalized/non-normalized
+    for( normalizedIdx = 0; normalizedIdx < 2; normalizedIdx++ )
+    {    
+        imageSampler->normalized_coords = flipFlop[ normalizedIdx ];
+        if( gNormalizedModeToUse != 7 && gNormalizedModeToUse != (int)imageSampler->normalized_coords )
+            continue;
+        
+        for( floatCoordIdx = 0; floatCoordIdx < 2; floatCoordIdx++ )
+        {
+            // Checks added in because this case was leaking through causing a crash on CPU
+            if( !flipFlop[ floatCoordIdx ] )
+                if( imageSampler->filter_mode != CL_FILTER_NEAREST      ||  // integer coords can only be used with nearest
+                   flipFlop[ normalizedIdx ] )                             // Normalized integer coords makes no sense (they'd all be zero)
+                    continue;
+            
+            
+            log_info( "read_image (%s coords, %s results) *****************************\n", 
+                     flipFlop[ floatCoordIdx ] ? ( imageSampler->normalized_coords ? "normalized float" : "unnormalized float" ) : "integer", 
+                     get_explicit_type_name( outputType ) );
+            
+            for( unsigned int i = 0; i < numFormats; i++ )
+            {
+                if( filterFlags[i] )
+                    continue;
+                
+                cl_image_format &imageFormat = formatList[ i ];
+                
+                ret |= test_read_image_type( device, &imageFormat, flipFlop[ floatCoordIdx ], imageSampler, outputType, imageType );
+            }
+        }
+    }
+    return ret;
+}
+
+
+int test_image_set( cl_device_id device, test_format_set_fn formatTestFn, cl_mem_object_type imageType )
+{
+    int ret = 0;
+    static int printedFormatList = -1;
+    
+        
+    if ( ( 0 == is_extension_available( device, "cl_khr_3d_image_writes" )) && (imageType == CL_MEM_OBJECT_IMAGE3D) && (formatTestFn == test_write_image_formats) )
+    {
+        log_info( "-----------------------------------------------------\n" );
+        log_info( "This device does not support cl_khr_3d_image_writes.\nSkipping 3D image write test. \n" );
+        log_info( "-----------------------------------------------------\n\n" );
+        return 0;
+    }
+
+    // Grab the list of supported image formats for integer reads
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+    
+    cl_mem_flags flags;
+    const char *flagNames;
+    if( formatTestFn == test_read_image_formats )
+    {
+        flags = CL_MEM_READ_ONLY;
+        flagNames = "read";
+    }
+    else
+    {
+        flags = CL_MEM_WRITE_ONLY;
+        flagNames = "write";
+    }
+    
+    if( get_format_list( device, imageType, formatList, numFormats, flags ) )
+        return -1;
+    
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    
+    // First time through, we'll go ahead and print the formats supported, regardless of type
+    int test = imageType | (formatTestFn == test_read_image_formats ? (1 << 16) : (1 << 17));
+    if( printedFormatList != test )
+    {
+        log_info( "---- Supported %s %s formats for this device ---- \n", convert_image_type_to_string(imageType), flagNames );
+        for( unsigned int f = 0; f < numFormats; f++ )
+        {
+            if ( IsChannelOrderSupported( formatList[ f ].image_channel_order ) && IsChannelTypeSupported( formatList[ f ].image_channel_order ) )
+                log_info( "  %-7s %-24s %d\n", GetChannelOrderName( formatList[ f ].image_channel_order ),
+                        GetChannelTypeName( formatList[ f ].image_channel_data_type ),
+                        (int)get_format_channel_count( &formatList[ f ] ) );
+        }
+        log_info( "------------------------------------------- \n" );
+        printedFormatList = test;
+    }
+    
+    image_sampler_data imageSampler;	
+    
+    /////// float tests ///////
+    
+    if( gTypesToTest & kTestFloat )
+    {
+        cl_channel_type floatFormats[] = { CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, 
+#ifdef OBSOLETE_FORAMT
+            CL_UNORM_SHORT_565_REV, CL_UNORM_SHORT_555_REV, CL_UNORM_INT_8888, CL_UNORM_INT_8888_REV, CL_UNORM_INT_101010_REV,
+#endif
+#ifdef CL_SFIXED14_APPLE
+            CL_SFIXED14_APPLE,
+#endif
+            CL_UNORM_INT8, CL_SNORM_INT8, 
+            CL_UNORM_INT16, CL_SNORM_INT16, CL_FLOAT, CL_HALF_FLOAT, (cl_channel_type)-1 };
+        if( filter_formats( formatList, filterFlags, numFormats, floatFormats ) == 0 )	
+        {
+            log_info( "No formats supported for float type\n" );
+        }
+        else
+        {
+            imageSampler.filter_mode = CL_FILTER_NEAREST;
+            ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kFloat, imageType );
+            
+            imageSampler.filter_mode = CL_FILTER_LINEAR;
+            ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kFloat, imageType );
+        }
+    }
+    
+    /////// int tests ///////
+    if( gTypesToTest & kTestInt )
+    {
+        cl_channel_type intFormats[] = { CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, (cl_channel_type)-1 };
+        if( filter_formats( formatList, filterFlags, numFormats, intFormats ) == 0 )
+        {
+            log_info( "No formats supported for integer type\n" );
+        }
+        else
+        {
+            // Only filter mode we support on int is nearest
+            imageSampler.filter_mode = CL_FILTER_NEAREST;
+            ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kInt, imageType );
+        }
+    }
+    
+    /////// uint tests ///////
+    
+    if( gTypesToTest & kTestUInt )
+    {
+        cl_channel_type uintFormats[] = { CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, (cl_channel_type)-1 };
+        if( filter_formats( formatList, filterFlags, numFormats, uintFormats ) == 0 )
+        {
+            log_info( "No formats supported for unsigned int type\n" );
+        }
+        else
+        {
+            // Only filter mode we support on uint is nearest
+            imageSampler.filter_mode = CL_FILTER_NEAREST;
+            ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kUInt, imageType );
+        }	
+    }
+    
+    
+    delete filterFlags;
+    delete formatList;
+    
+    return ret;
+}
--- a/test_conformance/images/kernel_read_write/test_read_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp
@@ -0,0 +1,863 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#if defined( __APPLE__ )
+    #include <signal.h>
+    #include <sys/signal.h>
+    #include <setjmp.h>
+#endif
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_device_type    gDeviceType;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
+const char *read1DKernelSourcePattern = 
+"__kernel void sample_kernel( read_only image1d_t input,%s __global float *xOffsets, __global %s4 *results )\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0);\n"
+"   int offset = tidX;\n"
+"%s"
+"   results[offset] = read_image%s( input, imageSampler, coord );\n"
+"}";
+
+const char *int1DCoordKernelSource = 
+"   int coord = xOffsets[offset];\n";
+
+const char *float1DKernelSource = 
+"   float coord = (float)xOffsets[offset];\n";
+
+static const char *samplerKernelArg = " sampler_t imageSampler,";
+
+#define ABS_ERROR( result, expected ) ( fabsf( (float)expected - (float)result ) )
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, 
+                            int x, int y, int z, float *outData );
+template <class T> int determine_validation_error_1D( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                T *resultPtr, T * expected, float error,
+                                float x, float xAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat )
+{
+    int actualX, actualY;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, ignoreMe;
+    
+    clamped = get_integer_coords_offset( x, 0.0f, 0.0f, xAddressOffset, 0.0f, 0.0f, imageInfo->width, 0, 0, imageSampler, imageInfo, clampedX, ignoreMe, ignoreMe );
+
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld!\n", imageInfo->width );
+                if( printAsFloat )
+                {
+                    log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                              (int)j, x, x, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                              (int)j, x, x, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                return 1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld!\n", imageInfo->width );
+                if( printAsFloat )
+                {
+                    log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                              (int)j, x, x, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                              (int)j, x, x, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                return 1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        if( printAsFloat )
+        {
+            log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g), error of %g\n",
+                      (int)j, x, x, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                      (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+        }
+        else
+        {
+            log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                      (int)j, x, x, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                                (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+        }
+        log_error( "img size %ld (pitch %ld)", imageInfo->width, imageInfo->rowPitch );
+        if( clamped )
+        {
+            log_error( " which would clamp to %d\n", clampedX );
+        }
+        if( printAsFloat && gExtraValidateInfo)
+        {
+            log_error( "Nearby values:\n" );
+            log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
+            {
+                float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 2, 0, 0, top );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 1, 0, 0, real );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX, 0, 0, bot );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, 0, 0, bot2 );
+                log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] );
+                log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
+                log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
+            }            
+        }
+
+        if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+        {
+            if( found )
+                log_error( "\tValue really found in image at %d (%s)\n", actualX, ( found > 1 ) ? "NOT unique!!" : "unique" );
+            else
+                log_error( "\tValue not actually found in image\n" );
+        }
+        log_error( "\n" );
+        
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+        {
+            return 1;
+        }
+    }    
+    return 0;
+}
+
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float xfract, int normalized_coords, MTdata d )
+{
+    size_t i = 0;
+    if( gDisableOffsets )
+    {
+        for( size_t x = 0; x < imageInfo->width; x++, i++ )
+        {
+            xOffsets[ i ] = (float) (xfract + (double) x);
+        }
+    }
+    else
+    {
+        for( size_t x = 0; x < imageInfo->width; x++, i++ )
+        {
+            xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+        }
+    }
+
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t x = 0; x < imageInfo->width; x++, i++ )
+        {
+            xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
+        }        
+    }
+
+    if( normalized_coords )
+    {
+        i = 0;
+        for( size_t x = 0; x < imageInfo->width; x++, i++ )
+        {
+            xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
+        }        
+    }
+}
+
+#ifndef MAX
+    #define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+
+int test_read_image_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, image_sampler_data *imageSampler, 
+                       bool useFloatCoords, ExplicitType outputType, MTdata d )
+{
+    int error;
+    static int initHalf = 0;
+
+    size_t threads[2];
+    
+    clMemWrapper xOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+
+    // The DataBuffer template class really does use delete[], not free -- IRO
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width));
+
+    // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+    
+    if( gDebugTrace )
+        log_info( " - Creating 1D image %d ...\n", (int)imageInfo->width );
+  
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+    
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+        // Do not use protected images for max image size test since it rounds the row size to a page size
+        if (gTestMaxImages) {
+          generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+          unprotImage = create_image_1d( context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, imageInfo->format, 
+                                        imageInfo->width, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                        maxImageUseHostPtrBackingStore, NULL, &error );
+        } else {
+                error = protImage.Create( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width );
+        }
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 1D image of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        
+        if (gTestMaxImages)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+        unprotImage = create_image_1d( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imageInfo->format, 
+                                      imageInfo->width, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                      imageValues, NULL, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 1D image of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+        // it works just as if no flag is specified, so we just do the same thing either way
+        unprotImage = create_image_1d( context, CL_MEM_READ_ONLY | gMemFlagsToUse, imageInfo->format, 
+                                      imageInfo->width, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                      imageValues, NULL, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 1D image of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+  
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+        
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, 1, 1 };
+        
+        error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                    origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
+                                   imageValues, 0, NULL, NULL);
+        if (error != CL_SUCCESS) 
+        {
+            log_error( "ERROR: Unable to write to 1D image of size %d\n", (int)imageInfo->width );
+            return error;
+        }
+    }
+
+    if( gDebugTrace )
+        log_info( " - Creating kernel arguments...\n" );
+    
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+    
+    // Create sampler to use
+    actualSampler = clCreateSampler( context, (cl_bool)imageSampler->normalized_coords, imageSampler->addressing_mode, imageSampler->filter_mode, &error );
+    test_error( error, "Unable to create image sampler" );
+    
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+    
+    // A cast of troublesome offsets. The first one has to be zero.
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+      log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }
+  
+	// Get the maximum absolute error for this format
+  	double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler); 
+  	if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+
+    for( int q = 0; q < loopCount; q++ )
+    {
+        float offset = float_offsets[ q % float_offset_count ];
+
+        // Init the coordinates
+        InitFloatCoords( imageInfo, imageSampler, xOffsetValues, 
+                            q>=float_offset_count ? -offset: offset, 
+                            imageSampler->normalized_coords, d );
+
+        error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->width, xOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write x offsets" );
+        
+        // Get results
+        size_t resultValuesSize = imageInfo->width * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
+        memset( resultValues, 0xff, resultValuesSize );
+        clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+        // Run the kernel
+        threads[0] = (size_t)imageInfo->width;
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+        
+        if( gDebugTrace )
+            log_info( "    reading results, %ld kbytes\n", (unsigned long)( imageInfo->width * get_explicit_type_size( outputType ) * 4 / 1024 ) );
+     
+        error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, imageInfo->width * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+          
+        // Validate results element by element
+        char *imagePtr = imageValues;
+                /*
+                 * FLOAT output type
+                 */
+        if( outputType == kFloat )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+            {
+                for( size_t x = 0, j = 0; x < imageInfo->width; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    float offset = NORM_OFFSET;
+                    if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                        // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                        || gDeviceType != CL_DEVICE_TYPE_GPU 
+#endif
+                    ) 
+                        offset = 0.0f;          // Loop only once
+                        
+                    for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {                            
+                            
+                            // Try sampling the pixel, without flushing denormals.
+                            int containsDenormals = 0;
+                            FloatPixel maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                            xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                            imageSampler, expected, 0, &containsDenormals );
+                            
+                            float err1 = fabsf( resultPtr[0] - expected[0] );
+                            float err2 = fabsf( resultPtr[1] - expected[1] );
+                            float err3 = fabsf( resultPtr[2] - expected[2] );
+                            float err4 = fabsf( resultPtr[3] - expected[3] );
+                            // Clamp to the minimum absolute error for the format
+                            if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                            if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                            if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                            if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                            float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                            float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                            float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                            float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                            
+                            // Check if the result matches.
+                            if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                            {
+                                //try flushing the denormals, if there is a failure.
+                                if( containsDenormals )
+                                {
+                                   // If implementation decide to flush subnormals to zero, 
+                                   // max error needs to be adjusted
+                                    maxErr1 += 4 * FLT_MIN;
+                                    maxErr2 += 4 * FLT_MIN;
+                                    maxErr3 += 4 * FLT_MIN;
+                                    maxErr4 += 4 * FLT_MIN;
+                                    
+                                    maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                 xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                 imageSampler, expected, 0, NULL );
+                                    
+                                    err1 = fabsf( resultPtr[0] - expected[0] );
+                                    err2 = fabsf( resultPtr[1] - expected[1] );
+                                    err3 = fabsf( resultPtr[2] - expected[2] );
+                                    err4 = fabsf( resultPtr[3] - expected[3] );                    
+                                }
+                            }
+                            
+                            // If the final result DOES match, then we've found a valid result and we're done with this pixel.
+                            found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);                             
+                    }//norm_offset_x
+                    
+                    
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+                                
+                                int containsDenormals = 0;
+                                FloatPixel maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                        xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                        imageSampler, expected, 0, &containsDenormals );
+                                
+                                float err1 = fabsf( resultPtr[0] - expected[0] );
+                                float err2 = fabsf( resultPtr[1] - expected[1] );
+                                float err3 = fabsf( resultPtr[2] - expected[2] );
+                                float err4 = fabsf( resultPtr[3] - expected[3] );
+                                float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                
+                                
+                                if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                    ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                                {
+                                    //try flushing the denormals, if there is a failure.
+                                    if( containsDenormals )
+                                    {
+                                        maxErr1 += 4 * FLT_MIN;
+                                        maxErr2 += 4 * FLT_MIN;
+                                        maxErr3 += 4 * FLT_MIN;
+                                        maxErr4 += 4 * FLT_MIN;                                    
+                                    
+                                        maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                     xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                     imageSampler, expected, 0, NULL );
+                                        
+                                        err1 = fabsf( resultPtr[0] - expected[0] );
+                                        err2 = fabsf( resultPtr[1] - expected[1] );
+                                        err3 = fabsf( resultPtr[2] - expected[2] );
+                                        err4 = fabsf( resultPtr[3] - expected[3] );                    
+                                    }
+                                }
+                                if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                    ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                                {                       
+                                    log_error("FAILED norm_offsets: %g:\n", norm_offset_x);
+                                    
+                                    float tempOut[4];
+                                    shouldReturn |= determine_validation_error_1D<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                        expected, error, xOffsetValues[ j ], norm_offset_x, j, numTries, numClamped, true );
+                                    
+                                    log_error( "Step by step:\n" );
+                                    FloatPixel temp = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                        xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                        imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
+                                    log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",    
+                                                        Ulp_Error( resultPtr[0], expected[0] ),
+                                                        Ulp_Error( resultPtr[1], expected[1] ),
+                                                        Ulp_Error( resultPtr[2], expected[2] ),
+                                                        Ulp_Error( resultPtr[3], expected[3] ),
+                                                        Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                    
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                                
+                        }//norm_offset_x
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+                    
+                    resultPtr += 4;
+                }
+            }
+        }
+            /*
+             * UINT output type
+             */            
+        else if( outputType == kUInt )
+        {
+            // Validate unsigned integer results
+            unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+            unsigned int expected[4];
+            float error;
+            for( size_t x = 0, j = 0; x < imageInfo->width; x++, j++ )
+            {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+                            
+                            sample_image_pixel_offset<unsigned int>( imageValues, imageInfo, 
+                                                                                             xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, 
+                                                                                             imageSampler, expected );
+                            
+                            
+                            error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                         errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+                            
+                            if (error <= MAX_ERR) 
+                                found_pixel = 1;
+                    }//norm_offset_x
+                    
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+                                
+                                sample_image_pixel_offset<unsigned int>( imageValues, imageInfo, 
+                                                                                                 xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, 
+                                                                                                 imageSampler, expected );
+                                
+                                
+                                error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                             errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                if( error > MAX_ERR )
+                                {
+                                    log_error("FAILED norm_offsets: %g:\n", norm_offset_x);
+
+                                    shouldReturn |= determine_validation_error_1D<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                                     expected, error, xOffsetValues[j], norm_offset_x, j, numTries, numClamped, false );
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                        }//norm_offset_x
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+                    
+                    resultPtr += 4;
+            }
+        }
+            /*
+             * INT output type
+             */                        
+        else
+        {
+            // Validate integer results
+            int *resultPtr = (int *)(char *)resultValues;
+            int expected[4];
+            float error;
+            for( size_t x = 0, j = 0; x < imageInfo->width; x++, j++ )
+            {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+                            
+                            sample_image_pixel_offset<int>( imageValues, imageInfo, 
+                                                            xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, 
+                                                            imageSampler, expected );
+                            
+                            
+                            error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                         errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                            
+                            if (error <= MAX_ERR) 
+                                found_pixel = 1;
+                    }//norm_offset_x
+                    
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+                                
+                                sample_image_pixel_offset<int>( imageValues, imageInfo, 
+                                                                xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, 
+                                                                imageSampler, expected );
+                                
+                                
+                                error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                             errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                
+                                if( error > MAX_ERR )
+                                {
+                                    log_error("FAILED norm_offsets: %g:\n", norm_offset_x);
+                                    
+                                    shouldReturn |= determine_validation_error_1D<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                     expected, error, xOffsetValues[j], norm_offset_x, j, numTries, numClamped, false );
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                        }//norm_offset_x
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+                    
+                    resultPtr += 4;
+            }
+        }    
+    }
+  
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+}
+
+int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, 
+                        bool floatCoords, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    RandomSeed seed( gRandomSeed );
+    int error;
+    
+    // Get our operating params
+    size_t maxWidth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0x0 };
+    size_t pixelSize;
+    
+    imageInfo.format = format;
+    imageInfo.height = 1;
+    imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+    pixelSize = get_pixel_size( imageInfo.format );
+    
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+    
+    // Determine types
+    if( outputType == kInt )
+        readFormat = "i";
+    else if( outputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+    
+    // Construct the source
+    const char *samplerArg = samplerKernelArg;
+    char samplerVar[ 1024 ] = "";
+    if( gUseKernelSamplers )
+    {
+        get_sampler_kernel_code( imageSampler, samplerVar );
+        samplerArg = "";
+    }
+    
+    sprintf( programSrc, read1DKernelSourcePattern, samplerArg, get_explicit_type_name( outputType ),
+            samplerVar,
+            floatCoords ? float1DKernelSource : int1DCoordKernelSource,
+            readFormat );
+    
+    ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+	
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            if( gDebugTrace )
+                log_info( "   at size %d\n", (int)imageInfo.width );
+
+            int retCode = test_read_image_1D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );    
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format);
+        
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            log_info("Testing %d\n", (int)sizes[ idx ][ 0 ]);
+            if( gDebugTrace )
+                log_info( "   at max size %d\n", (int)sizes[ idx ][ 0 ] );
+            int retCode = test_read_image_1D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
+        typeRange /= get_pixel_size( imageInfo.format ) / get_format_type_size( imageInfo.format );
+        imageInfo.width = (size_t)( ( typeRange + 255LL ) / 256LL );
+
+        while( imageInfo.width >= maxWidth / 2 )
+            imageInfo.width >>= 1;
+        imageInfo.rowPitch = imageInfo.width * pixelSize;
+            
+        gRoundingStartValue = 0;
+        do
+        {
+            if( gDebugTrace )
+                log_info( "   at size %d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, gRoundingStartValue, typeRange );
+            int retCode = test_read_image_1D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+            
+            gRoundingStartValue += imageInfo.width * pixelSize / get_format_type_size( imageInfo.format );
+
+        } while( gRoundingStartValue < typeRange );
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {            
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                if( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.rowPitch += extraWidth * pixelSize;
+                }
+                
+                size = (size_t)imageInfo.rowPitch * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+            
+            if( gDebugTrace )
+                log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
+            int retCode = test_read_image_1D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+    
+    return 0;
+}
--- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
@@ -0,0 +1,982 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#if defined( __APPLE__ )
+#include <signal.h>
+#include <sys/signal.h>
+#include <setjmp.h>
+#endif
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_device_type    gDeviceType;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
+const char *read1DArrayKernelSourcePattern = 
+"__kernel void sample_kernel( read_only image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results )\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"   int offset = tidY*get_image_width(input) + tidX;\n"
+"%s"
+"   results[offset] = read_image%s( input, imageSampler, coords );\n"
+"}";
+
+const char *intCoordKernelSource1DArray = 
+"   int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
+
+const char *floatKernelSource1DArray = 
+"   float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
+
+static const char *samplerKernelArg = " sampler_t imageSampler,";
+
+#define ABS_ERROR( result, expected ) ( fabsf( (float)expected - (float)result ) )
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, 
+                                   int x, int y, int z, float *outData );
+
+template <class T> int determine_validation_error_1D_arr( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                  T *resultPtr, T * expected, float error,
+                                                  float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat )
+{
+    int actualX, actualY;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, clampedY, ignoreMe;
+    
+    // FIXME: I do not believe this is correct for 1D or 2D image arrays;
+    //        it will report spurious validation failure reasons since
+    //        the clamping for such image objects is different than 1D-3D
+    //        image objects.
+    clamped = get_integer_coords_offset( x, y, 0.0f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->arraySize, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
+    
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX && clampedY == actualY )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->arraySize );
+                if( printAsFloat )
+                {
+                    log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                              (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                              (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                return 1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->arraySize );
+                if( printAsFloat )
+                {
+                    log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                              (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                              (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                return 1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        if( printAsFloat )
+        {
+            log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g), error of %g\n",
+                      (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                      (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+        }
+        else
+        {
+            log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                      (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                      (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+        }
+        log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->arraySize, imageInfo->rowPitch );
+        if( clamped )
+        {
+            log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
+        }
+        if( printAsFloat && gExtraValidateInfo)
+        {
+            log_error( "Nearby values:\n" );
+            log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
+            for( int yOff = -2; yOff <= 1; yOff++ )
+            {
+                float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
+                log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
+                log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
+                log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
+            }
+            
+            if( clampedY < 1 )
+            {
+                log_error( "Nearby values:\n" );
+                log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
+                for( int yOff = (int)imageInfo->arraySize - 2; yOff <= (int)imageInfo->arraySize + 1; yOff++ )
+                {
+                    float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
+                    log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
+                    log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                    log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
+                    log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
+                }
+            }
+        }
+        
+        if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+        {
+            if( found )
+                log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
+            else
+                log_error( "\tValue not actually found in image\n" );
+        }
+        log_error( "\n" );
+        
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+        {
+            return 1;
+        }
+    }    
+    return 0;
+}
+
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
+{
+    size_t i = 0;
+    if( gDisableOffsets )
+    {
+        for( size_t y = 0; y < imageInfo->arraySize; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) (xfract + (double) x);
+                yOffsets[ i ] = (float) (yfract + (double) y);
+            }
+        }
+    }
+    else
+    {
+        for( size_t y = 0; y < imageInfo->arraySize; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+                yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
+            }
+        }
+    }
+    
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t y = 0; y < imageInfo->arraySize; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double)imageInfo->width - 1.0);
+                yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->arraySize - 1.0);
+            }
+        }        
+    }
+    
+    if( normalized_coords )
+    {
+        i = 0;
+        for( size_t y = 0; y < imageInfo->arraySize; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
+                yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->arraySize);
+            }
+        }        
+    }
+}
+
+#ifndef MAX
+#define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+
+int test_read_image_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                             image_descriptor *imageInfo, image_sampler_data *imageSampler, 
+                             bool useFloatCoords, ExplicitType outputType, MTdata d )
+{
+    int error;
+    static int initHalf = 0;
+    
+    size_t threads[2];
+    
+    clMemWrapper xOffsets, yOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+    
+    // The DataBuffer template class really does use delete[], not free -- IRO
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->arraySize));
+    BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->arraySize));
+    
+    if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+        if( DetectFloatToHalfRoundingMode(queue) )
+            return 1;
+    
+    // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+    
+    if( gDebugTrace )
+        log_info( " - Creating 1D image array %d by %d...\n", (int)imageInfo->width, (int)imageInfo->arraySize );
+    
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+    
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+        // Do not use protected images for max image size test since it rounds the row size to a page size
+        if (gTestMaxImages) {
+            generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+            
+            unprotImage = create_image_1d_array(context, 
+                                                CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, 
+                                                imageInfo->format, 
+                                                imageInfo->width, imageInfo->arraySize, 
+                                                ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                                ( gEnablePitch ? imageInfo->slicePitch : 0),
+                                                maxImageUseHostPtrBackingStore, &error);
+        } else {
+            error = protImage.Create( context, CL_MEM_OBJECT_IMAGE1D_ARRAY, 
+                (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, 
+                imageInfo->width, 1, 1, imageInfo->arraySize );
+        }
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 1D image array of size %d x %d pitch %d (%s)\n", 
+                      (int)imageInfo->width, (int)imageInfo->arraySize, 
+                      (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        
+        if (gTestMaxImages)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+        unprotImage = create_image_1d_array(context, 
+                                            CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
+                                            imageInfo->format, 
+                                            imageInfo->width, imageInfo->arraySize, 
+                                            ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                            ( gEnablePitch ? imageInfo->slicePitch : 0),
+                                            imageValues, &error);
+        
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 1D image array of size %d x %d pitch %d (%s)\n", 
+                      (int)imageInfo->width, (int)imageInfo->arraySize, 
+                      (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+        // it works just as if no flag is specified, so we just do the same thing either way
+        unprotImage = create_image_1d_array(context, 
+                                            CL_MEM_READ_ONLY | gMemFlagsToUse, 
+                                            imageInfo->format, 
+                                            imageInfo->width, imageInfo->arraySize, 
+                                            ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                            ( gEnablePitch ? imageInfo->slicePitch : 0),
+                                            imageValues, &error);
+        
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 1D image array of size %d x %d pitch %d (%s)\n", 
+                      (int)imageInfo->width, (int)imageInfo->arraySize, 
+                      (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+        
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->arraySize, 1 };
+        
+        error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                    origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
+                                    imageValues, 0, NULL, NULL);
+        if (error != CL_SUCCESS) 
+        {
+            log_error( "ERROR: Unable to write to 1D image array of size %d x %d\n", 
+                      (int)imageInfo->width, (int)imageInfo->arraySize );
+            return error;
+        }
+    }
+    
+    if( gDebugTrace )
+        log_info( " - Creating kernel arguments...\n" );
+    
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), 
+        sizeof( cl_float ) * imageInfo->width * imageInfo->arraySize, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+    
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), 
+        sizeof( cl_float ) * imageInfo->width * imageInfo->arraySize, yOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  
+        get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->arraySize, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+    
+    // Create sampler to use
+    actualSampler = clCreateSampler( context, (cl_bool)imageSampler->normalized_coords, 
+        imageSampler->addressing_mode, imageSampler->filter_mode, &error );
+    test_error( error, "Unable to create image sampler" );
+    
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+    
+    // A cast of troublesome offsets. The first one has to be zero.
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+        log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }
+    
+    // Get the maximum absolute error for this format
+    double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler); 
+    if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+    
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+    
+    for( int q = 0; q < loopCount; q++ )
+    {
+        float offset = float_offsets[ q % float_offset_count ];
+        
+        // Init the coordinates
+        InitFloatCoords(imageInfo, imageSampler, xOffsetValues, yOffsetValues, 
+                        q>=float_offset_count ? -offset: offset, 
+                        q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d );
+        
+        error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->arraySize * imageInfo->width, xOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write x offsets" );
+        error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->arraySize * imageInfo->width, yOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write y offsets" );
+        
+        // Get results
+        size_t resultValuesSize = imageInfo->width * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
+        memset( resultValues, 0xff, resultValuesSize );
+        clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+        
+        // Run the kernel
+        threads[0] = (size_t)imageInfo->width;
+        threads[1] = (size_t)imageInfo->arraySize;
+        error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+        
+        if( gDebugTrace )
+            log_info( "    reading results, %ld kbytes\n", (unsigned long)( imageInfo->width * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4 / 1024 ) );
+        
+        error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, imageInfo->width * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+        
+        // Validate results element by element
+        char *imagePtr = imageValues;
+        /*
+         * FLOAT output type
+         */
+        if( outputType == kFloat )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+            for( size_t y = 0, j = 0; y < imageInfo->arraySize; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    float offset = NORM_OFFSET;
+                    if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                        // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                        || gDeviceType != CL_DEVICE_TYPE_GPU 
+#endif
+                        ) 
+                        offset = 0.0f;          // Loop only once
+                    
+                    for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
+                            
+                            
+                            // Try sampling the pixel, without flushing denormals.
+                            int containsDenormals = 0;
+                            FloatPixel maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                imageSampler, expected, 0, &containsDenormals );
+                            
+                            float err1 = fabsf( resultPtr[0] - expected[0] );
+                            float err2 = fabsf( resultPtr[1] - expected[1] );
+                            float err3 = fabsf( resultPtr[2] - expected[2] );
+                            float err4 = fabsf( resultPtr[3] - expected[3] );
+                            // Clamp to the minimum absolute error for the format
+                            if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                            if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                            if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                            if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                            float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                            float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                            float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                            float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                            
+                            // Check if the result matches.
+                            if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                               ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                            {
+                                //try flushing the denormals, if there is a failure.
+                                if( containsDenormals )
+                                {
+                                    // If implementation decide to flush subnormals to zero, 
+                                    // max error needs to be adjusted
+                                    maxErr1 += 4 * FLT_MIN;
+                                    maxErr2 += 4 * FLT_MIN;
+                                    maxErr3 += 4 * FLT_MIN;
+                                    maxErr4 += 4 * FLT_MIN;
+                                    
+                                    maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                               xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                               imageSampler, expected, 0, NULL );
+                                    
+                                    err1 = fabsf( resultPtr[0] - expected[0] );
+                                    err2 = fabsf( resultPtr[1] - expected[1] );
+                                    err3 = fabsf( resultPtr[2] - expected[2] );
+                                    err4 = fabsf( resultPtr[3] - expected[3] );                    
+                                }
+                            }
+                            
+                            // If the final result DOES match, then we've found a valid result and we're done with this pixel.
+                            found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);                             
+                        }//norm_offset_x
+                    }//norm_offset_y
+                    
+                    
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    norm_offset_y = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+                                
+                                int containsDenormals = 0;
+                                FloatPixel maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                      imageSampler, expected, 0, &containsDenormals );
+                                
+                                float err1 = fabsf( resultPtr[0] - expected[0] );
+                                float err2 = fabsf( resultPtr[1] - expected[1] );
+                                float err3 = fabsf( resultPtr[2] - expected[2] );
+                                float err4 = fabsf( resultPtr[3] - expected[3] );
+                                float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                
+                                
+                                if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                   ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                                {
+                                    //try flushing the denormals, if there is a failure.
+                                    if( containsDenormals )
+                                    {
+                                        maxErr1 += 4 * FLT_MIN;
+                                        maxErr2 += 4 * FLT_MIN;
+                                        maxErr3 += 4 * FLT_MIN;
+                                        maxErr4 += 4 * FLT_MIN;                                    
+                                        
+                                        maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                   xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                   imageSampler, expected, 0, NULL );
+                                        
+                                        err1 = fabsf( resultPtr[0] - expected[0] );
+                                        err2 = fabsf( resultPtr[1] - expected[1] );
+                                        err3 = fabsf( resultPtr[2] - expected[2] );
+                                        err4 = fabsf( resultPtr[3] - expected[3] );                    
+                                    }
+                                }
+                                if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                   ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                                {                       
+                                    log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+                                    
+                                    float tempOut[4];
+                                    shouldReturn |= determine_validation_error_1D_arr<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                      expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true );
+                                    
+                                    log_error( "Step by step:\n" );
+                                    FloatPixel temp = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                      imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
+                                    log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",    
+                                              Ulp_Error( resultPtr[0], expected[0] ),
+                                              Ulp_Error( resultPtr[1], expected[1] ),
+                                              Ulp_Error( resultPtr[2], expected[2] ),
+                                              Ulp_Error( resultPtr[3], expected[3] ),
+                                              Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                    
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                                
+                            }//norm_offset_x
+                        }//norm_offset_y
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+                    
+                    resultPtr += 4;
+                }
+            }
+        }
+        /*
+         * UINT output type
+         */            
+        else if( outputType == kUInt )
+        {
+            // Validate unsigned integer results
+            unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+            unsigned int expected[4];
+            float error;
+            for( size_t y = 0, j = 0; y < imageInfo->arraySize; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                            
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                norm_offset_y = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+                            
+                            sample_image_pixel_offset<unsigned int>( imageValues, imageInfo, 
+                                                                    xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, 
+                                                                    imageSampler, expected );
+                            
+                            
+                            error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                           errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+                            
+                            if (error <= MAX_ERR) 
+                                found_pixel = 1;
+                        }//norm_offset_x
+                    }//norm_offset_y
+                    
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    norm_offset_y = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+                                
+                                sample_image_pixel_offset<unsigned int>( imageValues, imageInfo, 
+                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, 
+                                                                        imageSampler, expected );
+                                
+                                
+                                error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                               errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                
+                                if( error > MAX_ERR )
+                                {
+                                    log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+                                    
+                                    shouldReturn |= determine_validation_error_1D_arr<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                             expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false );
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                            }//norm_offset_x
+                        }//norm_offset_y
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+                    
+                    resultPtr += 4;
+                }
+            }
+        }
+        /*
+         * INT output type
+         */                        
+        else
+        {
+            // Validate integer results
+            int *resultPtr = (int *)(char *)resultValues;
+            int expected[4];
+            float error;
+            for( size_t y = 0, j = 0; y < imageInfo->arraySize; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                            
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                norm_offset_y = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+                            
+                            sample_image_pixel_offset<int>( imageValues, imageInfo, 
+                                                           xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, 
+                                                           imageSampler, expected );
+                            
+                            
+                            error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                           errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                            
+                            if (error <= MAX_ERR) 
+                                found_pixel = 1;
+                        }//norm_offset_x
+                    }//norm_offset_y
+                    
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    norm_offset_y = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+                                
+                                sample_image_pixel_offset<int>( imageValues, imageInfo, 
+                                                               xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, 
+                                                               imageSampler, expected );
+                                
+                                
+                                error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                               errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                
+                                if( error > MAX_ERR )
+                                {
+                                    log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+                                    
+                                    shouldReturn |= determine_validation_error_1D_arr<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                    expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false );
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                            }//norm_offset_x
+                        }//norm_offset_y
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+                    
+                    resultPtr += 4;
+                }
+            }
+        }    
+    }
+    
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+}
+
+int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, 
+                                 bool floatCoords, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    RandomSeed seed( gRandomSeed );
+    int error;
+    
+    // Get our operating params
+    size_t maxWidth, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0x0 };
+    size_t pixelSize;
+    
+    imageInfo.format = format;
+    imageInfo.depth = imageInfo.height = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    pixelSize = get_pixel_size( imageInfo.format );
+    
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D array size from device" );
+    
+    // Determine types
+    if( outputType == kInt )
+        readFormat = "i";
+    else if( outputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+    
+    // Construct the source
+    const char *samplerArg = samplerKernelArg;
+    char samplerVar[ 1024 ] = "";
+    if( gUseKernelSamplers )
+    {
+        get_sampler_kernel_code( imageSampler, samplerVar );
+        samplerArg = "";
+    }
+    
+    sprintf( programSrc, read1DArrayKernelSourcePattern, samplerArg, get_explicit_type_name( outputType ),
+            samplerVar,
+            floatCoords ? floatKernelSource1DArray : intCoordKernelSource1DArray,
+            readFormat );
+    
+    ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+    
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.slicePitch = imageInfo.width * pixelSize;
+            for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
+                
+                int retCode = test_read_image_1D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );    
+                if( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format);
+        
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ]; // 3rd dimension in get_max_sizes
+            imageInfo.rowPitch = imageInfo.slicePitch = imageInfo.width * pixelSize;
+            log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
+            int retCode = test_read_image_1D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
+        typeRange /= pixelSize / get_format_type_size( imageInfo.format );
+        imageInfo.arraySize = (size_t)( ( typeRange + 255LL ) / 256LL );
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.arraySize );
+        while( imageInfo.arraySize >= maxArraySize / 2 )
+        {
+            imageInfo.width <<= 1;
+            imageInfo.arraySize >>= 1;
+        }
+        
+        while( imageInfo.width >= maxWidth / 2 )
+            imageInfo.width >>= 1;
+        imageInfo.rowPitch = imageInfo.slicePitch = imageInfo.width * pixelSize;
+        
+        gRoundingStartValue = 0;
+        do
+        {
+            if( gDebugTrace )
+                log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.arraySize, gRoundingStartValue, typeRange );
+            int retCode = test_read_image_1D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+            
+            gRoundingStartValue += imageInfo.width * imageInfo.arraySize * pixelSize / get_format_type_size( imageInfo.format );
+            
+        } while( gRoundingStartValue < typeRange );
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {            
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+                
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                if( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.rowPitch += extraWidth * pixelSize;
+                }
+                imageInfo.slicePitch = imageInfo.rowPitch;
+                
+                size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.arraySize * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+            
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
+            int retCode = test_read_image_1D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+    
+    return 0;
+}
--- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
@@ -0,0 +1,959 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool         gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_device_type   gDeviceType;
+extern bool         gUseKernelSamplers;
+extern cl_filter_mode   gFilterModeToUse;
+extern cl_addressing_mode   gAddressModeToUse;
+extern cl_mem_flags gMemFlagsToUse;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
+
+const char *read2DArrayKernelSourcePattern = 
+"__kernel void sample_kernel( read_only image2d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s4 *results )\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"   int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n"
+"%s"
+"   results[offset] = read_image%s( input, imageSampler, coords );\n"
+"}";
+
+const char *int2DArrayCoordKernelSource = 
+"   int4 coords = (int4)( (int) xOffsets[offset], (int) yOffsets[offset], (int) zOffsets[offset], 0 );\n";
+
+const char *float2DArrayUnnormalizedCoordKernelSource = 
+"   float4 coords = (float4)( xOffsets[offset], yOffsets[offset], zOffsets[offset], 0.0f );\n";
+
+
+static const char *samplerKernelArg = " sampler_t imageSampler,";
+
+#define ABS_ERROR( result, expected ) ( fabsf( (float)expected - (float)result ) )
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData );
+template <class T> int determine_validation_error_offset_2D_array( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                         T *resultPtr, T * expected, float error,
+                                                         float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat )
+{
+    int actualX, actualY, actualZ;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, &actualZ );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, clampedY, clampedZ;
+    
+    size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height, imageDepth = imageInfo->arraySize;
+    
+    clamped = get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, imageWidth, imageHeight, imageDepth, imageSampler, imageInfo, clampedX, clampedY, clampedZ );
+    
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX && clampedY == actualY && clampedZ == actualZ )
+        {
+            if( (--numClamped) == 0 )
+            {
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n",
+                              j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
+                              j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }        
+                log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates!\n" );
+                return -1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n",
+                              j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
+                              j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }        
+                log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color!\n" );
+                return -1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        /*      if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 || (int)y + (int)yOffsetValues[ j ] < 0 ) )
+         {
+         log_error( "NEGATIVE COORDINATE ERROR\n" );
+         return -1;
+         }
+         */
+        if( true ) // gExtraValidateInfo )
+        {
+            if( printAsFloat )
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\t     got (%g,%g,%g,%g), error of %g\n",
+                          j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                          (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+            }
+            else
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\t     got (%x,%x,%x,%x)\n",
+                          j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                          (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+            }
+            log_error( "Integer coords resolve to %d,%d,%d   with img size %d,%d,%d\n", clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, (int)imageDepth );
+            
+            if( printAsFloat && gExtraValidateInfo )
+            {
+                log_error( "\nNearby values:\n" );
+                for( int zOff = -1; zOff <= 1; zOff++ )
+                {
+                    for( int yOff = -1; yOff <= 1; yOff++ )
+                    {
+                        float top[ 4 ], real[ 4 ], bot[ 4 ];
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 , clampedY + yOff, clampedZ + zOff, top );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX ,clampedY + yOff, clampedZ + zOff, real );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, clampedZ + zOff, bot );
+                        log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] );
+                        log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                        log_error( " (%g,%g,%g,%g)\n",bot[0], bot[1], bot[2], bot[3] );
+                    }
+                }
+            }
+            //      }
+            //      else
+            //          log_error( "\n" );
+            if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+            {
+                if( found )
+                    log_error( "\tValue really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" );
+                else
+                    log_error( "\tValue not actually found in image\n" );
+            }
+            log_error( "\n" );
+        }
+        
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+            return -1;
+    }   
+    return 0;
+}
+
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float *zOffsets, float xfract, float yfract, float zfract, int normalized_coords, MTdata d )
+{
+    size_t i = 0;
+    if( gDisableOffsets )
+    {
+        for( size_t z = 0; z < imageInfo->arraySize; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) x);
+                    yOffsets[ i ] = (float) (yfract + (double) y);
+                    zOffsets[ i ] = (float) (zfract + (double) z);
+                }
+            }
+        }
+    }
+    else
+    {
+        for( size_t z = 0; z < imageInfo->arraySize; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+                    yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
+                    zOffsets[ i ] = (float) (zfract + (double) ((int) z + random_in_range( -10, 10, d )));
+                }
+            }
+        }
+    }
+    
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t z = 0; z < imageInfo->arraySize; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
+                    yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double) imageInfo->height - 1.0);
+                    zOffsets[ i ] = (float) CLAMP( (double) zOffsets[ i ], 0.0, (double) imageInfo->arraySize - 1.0);
+                }
+            }        
+        }
+    }
+    
+    if( normalized_coords )
+    {
+        i = 0;
+        for( size_t z = 0; z < imageInfo->arraySize; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
+                    yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
+                    zOffsets[ i ] = (float) ((double) zOffsets[ i ] / (double) imageInfo->arraySize);
+                }
+            }        
+        }
+    }
+}
+
+#ifndef MAX
+#define MAX(_a, _b)             ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+int test_read_image_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                       image_descriptor *imageInfo, image_sampler_data *imageSampler, 
+                       bool useFloatCoords, ExplicitType outputType, MTdata d )
+{
+    int error;
+    size_t threads[3];
+    static int initHalf = 0;
+    
+    clMemWrapper xOffsets, yOffsets, zOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+    
+    // Create offset data
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize));
+    BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize));
+    BufferOwningPtr<cl_float> zOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize));
+    
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+    
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+    
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+        // Do not use protected images for max image size test since it rounds the row size to a page size
+        if (gTestMaxImages) {
+            generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+            unprotImage = create_image_2d_array( context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, imageInfo->format, 
+                                          imageInfo->width, imageInfo->height, imageInfo->arraySize, ( gEnablePitch ? imageInfo->rowPitch : 0 ), ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                          maxImageUseHostPtrBackingStore, &error );
+        } else {
+            error = protImage.Create( context, CL_MEM_OBJECT_IMAGE2D_ARRAY, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, 1, imageInfo->arraySize );
+        }
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        if (gTestMaxImages)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+        unprotImage = create_image_2d_array( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imageInfo->format, 
+                                      imageInfo->width, imageInfo->height, imageInfo->arraySize, ( gEnablePitch ? imageInfo->rowPitch : 0 ), ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                      imageValues, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+        // it works just as if no flag is specified, so we just do the same thing either way
+        unprotImage = create_image_2d_array( context, CL_MEM_READ_ONLY | gMemFlagsToUse, imageInfo->format, 
+                                      imageInfo->width, imageInfo->height, imageInfo->arraySize,
+                                      ( gEnablePitch ? imageInfo->rowPitch : 0 ), ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                      imageValues, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+        
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->arraySize };
+        
+        error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                    origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0,
+                                    imageValues, 0, NULL, NULL);
+        if (error != CL_SUCCESS) 
+        {
+            log_error( "ERROR: Unable to write to 2D image array of size %d x %d x %d\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize );
+            return error;
+        }
+    }
+        
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, yOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    zOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, zOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height * imageInfo->arraySize, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+    
+    // Create sampler to use
+    actualSampler = clCreateSampler( context, (cl_bool)imageSampler->normalized_coords, imageSampler->addressing_mode, imageSampler->filter_mode, &error );
+    test_error( error, "Unable to create image sampler" );
+    
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &zOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+    
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+        log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }  
+    
+    // Get the maximum absolute error for this format
+    double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler); 
+    if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+    
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+
+    for( int q = 0; q < loopCount; q++ )
+    {
+        float offset = float_offsets[ q % float_offset_count ];
+        
+        // Init the coordinates
+        InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues, zOffsetValues,
+                        q>=float_offset_count ? -offset: offset, 
+                        q>=float_offset_count ? offset: -offset, 
+                        q>=float_offset_count ? -offset: offset, 
+                        imageSampler->normalized_coords, d );
+                                                        
+        error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, xOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write x offsets" );
+        error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, yOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write y offsets" );
+        error = clEnqueueWriteBuffer( queue, zOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, zOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write z offsets" );
+        
+        
+        size_t resultValuesSize = imageInfo->width * imageInfo->height * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc( resultValuesSize ));
+        memset( resultValues, 0xff, resultValuesSize );
+        clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+        
+        // Figure out thread dimensions
+        threads[0] = (size_t)imageInfo->width;
+        threads[1] = (size_t)imageInfo->height;
+        threads[2] = (size_t)imageInfo->arraySize;
+        
+        // Run the kernel
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+        
+        // Get results
+        error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, imageInfo->width * imageInfo->height * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+        
+        // Validate results element by element
+        char *imagePtr = imageValues;
+        /*
+         * FLOAT output type
+         */         
+        if( outputType == kFloat )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); 
+            
+            for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
+            {
+                for( size_t y = 0; y < imageInfo->height; y++ )
+                {
+                    for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        float offset = NORM_OFFSET;
+                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                            || gDeviceType != CL_DEVICE_TYPE_GPU 
+#endif
+                            ) 
+                            offset = 0.0f;          // Loop only once
+                        
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {                                    
+                                    
+                                    int hasDenormals = 0;
+                                    FloatPixel maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                          xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                          norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                          imageSampler, expected, 0, &hasDenormals );
+                                    
+                                    float err1 = fabsf( resultPtr[0] - expected[0] );
+                                    float err2 = fabsf( resultPtr[1] - expected[1] );
+                                    float err3 = fabsf( resultPtr[2] - expected[2] );
+                                    float err4 = fabsf( resultPtr[3] - expected[3] );
+                                    // Clamp to the minimum absolute error for the format
+                                    if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                                    if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                                    if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                                    if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }           
+                                    float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                    float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                    float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                    float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                    
+                                    if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                    {           
+                                        // Try flushing the denormals
+                                        if( hasDenormals )
+                                        {
+                                            // If implementation decide to flush subnormals to zero, 
+                                            // max error needs to be adjusted                                                 
+                                            maxErr1 += 4 * FLT_MIN;
+                                            maxErr2 += 4 * FLT_MIN;
+                                            maxErr3 += 4 * FLT_MIN;
+                                            maxErr4 += 4 * FLT_MIN;
+    
+                                            maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                       imageSampler, expected, 0, NULL );
+                                            
+                                            err1 = fabsf( resultPtr[0] - expected[0] );
+                                            err2 = fabsf( resultPtr[1] - expected[1] );
+                                            err3 = fabsf( resultPtr[2] - expected[2] );
+                                            err4 = fabsf( resultPtr[3] - expected[3] );
+                                        }
+                                    }
+                                    
+                                    found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+                        
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                        
+                                        int hasDenormals = 0;
+                                        FloatPixel maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, expected, 0, &hasDenormals );
+                                        
+                                        float err1 = fabsf( resultPtr[0] - expected[0] );
+                                        float err2 = fabsf( resultPtr[1] - expected[1] );
+                                        float err3 = fabsf( resultPtr[2] - expected[2] );
+                                        float err4 = fabsf( resultPtr[3] - expected[3] );
+                                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                        float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                        float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                        float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                        
+                                        
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            // Try flushing the denormals
+                                            if( hasDenormals )
+                                            {
+                                                maxErr1 += 4 * FLT_MIN;
+                                                maxErr2 += 4 * FLT_MIN;
+                                                maxErr3 += 4 * FLT_MIN;
+                                                maxErr4 += 4 * FLT_MIN;
+                                            
+                                                maxPixel = sample_image_pixel_float( imageValues, imageInfo, 
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    imageSampler, expected, 0, NULL );
+                                                
+                                                err1 = fabsf( resultPtr[0] - expected[0] );
+                                                err2 = fabsf( resultPtr[1] - expected[1] );
+                                                err3 = fabsf( resultPtr[2] - expected[2] );
+                                                err4 = fabsf( resultPtr[3] - expected[3] );
+                                            }
+                                        }
+                                        
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            
+                                            float tempOut[4];
+                                            shouldReturn |= determine_validation_error_offset_2D_array<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], 
+                                                                                                     norm_offset_x, norm_offset_y, norm_offset_z, j, 
+                                                                                                     numTries, numClamped, true );
+                                            log_error( "Step by step:\n" );
+                                            FloatPixel temp = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, tempOut, 1 /*verbose*/, &hasDenormals);
+                                            log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",    
+                                                      Ulp_Error( resultPtr[0], expected[0] ),
+                                                      Ulp_Error( resultPtr[1], expected[1] ),
+                                                      Ulp_Error( resultPtr[2], expected[2] ),
+                                                      Ulp_Error( resultPtr[3], expected[3] ),
+                                                      Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+                        
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        /*
+         * UINT output type
+         */                     
+        else if( outputType == kUInt )
+        {
+            // Validate unsigned integer results
+            unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+            unsigned int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
+            {
+                for( size_t y = 0; y < imageInfo->height; y++ )
+                {
+                    for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                    
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+                                    
+                                    sample_image_pixel_offset<unsigned int>( imageValues, imageInfo, 
+                                                                            xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                            norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                            imageSampler, expected );
+                                    
+                                    error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                    
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+                        
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                        
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+                                        
+                                        sample_image_pixel_offset<unsigned int>( imageValues, imageInfo, 
+                                                                                xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                imageSampler, expected );
+                                        
+                                        error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                        
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset_2D_array<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                             expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], 
+                                                                                                             norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                             j, numTries, numClamped, false );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+                        
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        else
+        /*
+         * INT output type
+         */                                         
+        {
+            // Validate integer results
+            int *resultPtr = (int *)(char *)resultValues;
+            int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
+            {
+                for( size_t y = 0; y < imageInfo->height; y++ )
+                {
+                    for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                    
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+                                    
+                                    sample_image_pixel_offset<int>( imageValues, imageInfo, 
+                                                                   xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                   norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                   imageSampler, expected );
+                                    
+                                    error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                    
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+                        
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                        
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0 || NORM_OFFSET == 0 || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+                                        
+                                        sample_image_pixel_offset<int>( imageValues, imageInfo, 
+                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                       imageSampler, expected );
+                                        
+                                        error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                        
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset_2D_array<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                    expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], 
+                                                                                                    norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                    j, numTries, numClamped, false );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+                        
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+    }
+    
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+}
+
+int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, 
+                           bool floatCoords, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    RandomSeed seed( gRandomSeed );
+    
+    int error;
+    
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    
+    // Get operating parameters
+    size_t maxWidth, maxHeight, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0x0 };
+    size_t pixelSize;
+    
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    pixelSize = get_pixel_size( imageInfo.format );
+    
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+    
+    // Determine types
+    if( outputType == kInt )
+        readFormat = "i";
+    else if( outputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+    
+    // Construct the source
+    const char *samplerArg = samplerKernelArg;
+    char samplerVar[ 1024 ] = "";
+    if( gUseKernelSamplers )
+    {
+        get_sampler_kernel_code( imageSampler, samplerVar );
+        samplerArg = "";
+    }
+    
+    // Construct the source
+    sprintf( programSrc, read2DArrayKernelSourcePattern, samplerArg, get_explicit_type_name( outputType ),
+            samplerVar,
+            floatCoords ? float2DArrayUnnormalizedCoordKernelSource : int2DArrayCoordKernelSource,
+            readFormat );
+    
+    ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+    
+    
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+                {
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
+                    int retCode = test_read_image_2D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );  
+                    if( retCode )
+                        return retCode;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format);
+        
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+            log_info("Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ]);
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            int retCode = test_read_image_2D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );  
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.height = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+        imageInfo.arraySize = 2;
+        
+        imageInfo.rowPitch = imageInfo.width * pixelSize;
+        imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+        int retCode = test_read_image_2D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );  
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 128, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 128, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+                
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                
+                if( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.rowPitch += extraWidth * pixelSize;
+                    
+                    size_t extraHeight = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+                }
+                
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+            
+            if( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
+            int retCode = test_read_image_2D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );  
+            if( retCode )
+                return retCode;
+        }
+    }
+    
+    return 0;
+}
--- a/test_conformance/images/kernel_read_write/test_read_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_3D.cpp
@@ -0,0 +1,966 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool			gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_device_type	gDeviceType;
+extern bool			gUseKernelSamplers;
+extern cl_filter_mode	gFilterModeToUse;
+extern cl_addressing_mode	gAddressModeToUse;
+extern cl_mem_flags gMemFlagsToUse;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
+
+const char *read3DKernelSourcePattern = 
+"__kernel void sample_kernel( read_only image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s4 *results )\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"   int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n"
+"%s"
+"   results[offset] = read_image%s( input, imageSampler, coords );\n"
+"}";
+
+const char *int3DCoordKernelSource = 
+"   int4 coords = (int4)( (int) xOffsets[offset], (int) yOffsets[offset], (int) zOffsets[offset], 0 );\n";
+
+const char *float3DUnnormalizedCoordKernelSource = 
+"   float4 coords = (float4)( xOffsets[offset], yOffsets[offset], zOffsets[offset], 0.0f );\n";
+
+
+static const char *samplerKernelArg = " sampler_t imageSampler,";
+
+#define ABS_ERROR( result, expected ) ( fabsf( (float)expected - (float)result ) )
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData );
+template <class T> int determine_validation_error_offset( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                         T *resultPtr, T * expected, float error,
+                                                         float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat )
+{
+    int actualX, actualY, actualZ;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, &actualZ );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, clampedY, clampedZ;
+    
+    size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height, imageDepth = imageInfo->depth;
+    
+    clamped = get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, imageWidth, imageHeight, imageDepth, imageSampler, imageInfo, clampedX, clampedY, clampedZ );
+    
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX && clampedY == actualY && clampedZ == actualZ )
+        {
+            if( (--numClamped) == 0 )
+            {
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n",
+                              j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
+                              j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }        
+                log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates!\n" );
+                return -1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n",
+                              j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
+                              j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }        
+                log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color!\n" );
+                return -1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        /*		if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 || (int)y + (int)yOffsetValues[ j ] < 0 ) )
+         {
+         log_error( "NEGATIVE COORDINATE ERROR\n" );
+         return -1;
+         }
+         */
+        if( true ) // gExtraValidateInfo )
+        {
+            if( printAsFloat )
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\t     got (%g,%g,%g,%g), error of %g\n",
+                          j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                          (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+            }
+            else
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\t     got (%x,%x,%x,%x)\n",
+                          j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                          (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+            }
+            log_error( "Integer coords resolve to %d,%d,%d   with img size %d,%d,%d\n", clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, (int)imageDepth );
+            
+            if( printAsFloat && gExtraValidateInfo )
+            {
+                log_error( "\nNearby values:\n" );
+                for( int zOff = -1; zOff <= 1; zOff++ )
+                {
+                    for( int yOff = -1; yOff <= 1; yOff++ )
+                    {
+                        float top[ 4 ], real[ 4 ], bot[ 4 ];
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 , clampedY + yOff, clampedZ + zOff, top );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX ,clampedY + yOff, clampedZ + zOff, real );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, clampedZ + zOff, bot );
+                        log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] );
+                        log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                        log_error( " (%g,%g,%g,%g)\n",bot[0], bot[1], bot[2], bot[3] );
+                    }
+                }
+            }
+            //		}
+            //		else
+            //			log_error( "\n" );
+            if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+            {
+                if( found )
+                    log_error( "\tValue really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" );
+                else
+                    log_error( "\tValue not actually found in image\n" );
+            }
+            log_error( "\n" );
+        }
+        
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+            return -1;
+    }	
+    return 0;
+}
+
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float *zOffsets, float xfract, float yfract, float zfract, int normalized_coords, MTdata d )
+{
+    size_t i = 0;
+    if( gDisableOffsets )
+    {
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) x);
+                    yOffsets[ i ] = (float) (yfract + (double) y);
+                    zOffsets[ i ] = (float) (zfract + (double) z);
+                }
+            }
+        }
+    }
+    else
+    {
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+                    yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
+                    zOffsets[ i ] = (float) (zfract + (double) ((int) z + random_in_range( -10, 10, d )));
+                }
+            }
+        }
+    }
+    
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
+                    yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double) imageInfo->height - 1.0);
+                    zOffsets[ i ] = (float) CLAMP( (double) zOffsets[ i ], 0.0, (double) imageInfo->depth - 1.0);
+                }
+            }        
+        }
+    }
+    
+    if( normalized_coords )
+    {
+        i = 0;
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
+                    yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
+                    zOffsets[ i ] = (float) ((double) zOffsets[ i ] / (double) imageInfo->depth);
+                }
+            }        
+        }
+    }
+}
+
+#ifndef MAX
+#define MAX(_a, _b)             ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+int test_read_image_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                       image_descriptor *imageInfo, image_sampler_data *imageSampler, 
+                       bool useFloatCoords, ExplicitType outputType, MTdata d )
+{
+    int error;
+    size_t threads[3];
+    static int initHalf = 0;
+    
+    clMemWrapper xOffsets, yOffsets, zOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+    
+    // Create offset data
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+    BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+    BufferOwningPtr<cl_float> zOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+    
+    if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+        if( DetectFloatToHalfRoundingMode(queue) )
+            return 1;
+        
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+    
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+    
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+        // Do not use protected images for max image size test since it rounds the row size to a page size
+        if (gTestMaxImages) {
+            generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+            unprotImage = create_image_3d( context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, imageInfo->format, 
+                                          imageInfo->width, imageInfo->height, imageInfo->depth, ( gEnablePitch ? imageInfo->rowPitch : 0 ), ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                          maxImageUseHostPtrBackingStore, &error );
+        } else {
+            error = protImage.Create( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->depth );
+        }
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        if (gTestMaxImages)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+        unprotImage = create_image_3d( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imageInfo->format, 
+                                      imageInfo->width, imageInfo->height, imageInfo->depth, ( gEnablePitch ? imageInfo->rowPitch : 0 ), ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                      imageValues, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+        // it works just as if no flag is specified, so we just do the same thing either way
+        unprotImage = create_image_3d( context, CL_MEM_READ_ONLY | gMemFlagsToUse, imageInfo->format, 
+                                      imageInfo->width, imageInfo->height, imageInfo->depth,
+                                      ( gEnablePitch ? imageInfo->rowPitch : 0 ), ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                      imageValues, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+        
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth };
+        
+        error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                    origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0,
+                                    imageValues, 0, NULL, NULL);
+        if (error != CL_SUCCESS) 
+        {
+            log_error( "ERROR: Unable to write to 3D image of size %d x %d x %d\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+            return error;
+        }
+    }
+        
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, yOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    zOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, zOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height * imageInfo->depth, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+    
+    // Create sampler to use
+    actualSampler = clCreateSampler( context, (cl_bool)imageSampler->normalized_coords, imageSampler->addressing_mode, imageSampler->filter_mode, &error );
+    test_error( error, "Unable to create image sampler" );
+    
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &zOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+    
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+        log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }  
+    
+    // Get the maximum absolute error for this format
+    double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler); 
+    if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+    
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+
+    for( int q = 0; q < loopCount; q++ )
+    {
+        float offset = float_offsets[ q % float_offset_count ];
+        
+        // Init the coordinates
+        InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues, zOffsetValues,
+                        q>=float_offset_count ? -offset: offset, 
+                        q>=float_offset_count ? offset: -offset, 
+                        q>=float_offset_count ? -offset: offset, 
+                        imageSampler->normalized_coords, d );
+                                                        
+        error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, xOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write x offsets" );
+        error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, yOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write y offsets" );
+        error = clEnqueueWriteBuffer( queue, zOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, zOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write z offsets" );
+        
+        
+        size_t resultValuesSize = imageInfo->width * imageInfo->height * imageInfo->depth * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc( resultValuesSize ));
+        memset( resultValues, 0xff, resultValuesSize );
+        clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+        
+        // Figure out thread dimensions
+        threads[0] = (size_t)imageInfo->width;
+        threads[1] = (size_t)imageInfo->height;
+        threads[2] = (size_t)imageInfo->depth;
+        
+        // Run the kernel
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+        
+        // Get results
+        error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, imageInfo->width * imageInfo->height * imageInfo->depth * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+        
+        // Validate results element by element
+        char *imagePtr = imageValues;
+        /*
+         * FLOAT output type
+         */			
+        if( outputType == kFloat )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); 
+            
+            for( size_t z = 0, j = 0; z < imageInfo->depth; z++ )
+            {
+                for( size_t y = 0; y < imageInfo->height; y++ )
+                {
+                    for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        float offset = NORM_OFFSET;
+                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                            || gDeviceType != CL_DEVICE_TYPE_GPU 
+#endif
+                            ) 
+                            offset = 0.0f;          // Loop only once
+                        
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {                                    
+                                    
+                                    int hasDenormals = 0;
+                                    FloatPixel maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                          xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                          norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                          imageSampler, expected, 0, &hasDenormals );
+                                    
+                                    float err1 = fabsf( resultPtr[0] - expected[0] );
+                                    float err2 = fabsf( resultPtr[1] - expected[1] );
+                                    float err3 = fabsf( resultPtr[2] - expected[2] );
+                                    float err4 = fabsf( resultPtr[3] - expected[3] );
+                                    // Clamp to the minimum absolute error for the format
+                                    if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                                    if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                                    if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                                    if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }           
+                                    float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                    float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                    float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                    float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                    
+                                    if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                    {           
+                                        // Try flushing the denormals
+                                        if( hasDenormals )
+                                        {
+                                            // If implementation decide to flush subnormals to zero, 
+                                            // max error needs to be adjusted                                                 
+  	                                        maxErr1 += 4 * FLT_MIN;
+                                            maxErr2 += 4 * FLT_MIN;
+                                            maxErr3 += 4 * FLT_MIN;
+                                            maxErr4 += 4 * FLT_MIN;
+    
+                                            maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                       imageSampler, expected, 0, NULL );
+                                            
+                                            err1 = fabsf( resultPtr[0] - expected[0] );
+                                            err2 = fabsf( resultPtr[1] - expected[1] );
+                                            err3 = fabsf( resultPtr[2] - expected[2] );
+                                            err4 = fabsf( resultPtr[3] - expected[3] );
+                                        }
+                                    }
+                                    
+                                    found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+                        
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                        
+                                        int hasDenormals = 0;
+                                        FloatPixel maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, expected, 0, &hasDenormals );
+                                        
+                                        float err1 = fabsf( resultPtr[0] - expected[0] );
+                                        float err2 = fabsf( resultPtr[1] - expected[1] );
+                                        float err3 = fabsf( resultPtr[2] - expected[2] );
+                                        float err4 = fabsf( resultPtr[3] - expected[3] );
+                                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                        float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                        float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                        float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                        
+                                        
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            // Try flushing the denormals
+                                            if( hasDenormals )
+                                            {
+  	                                            maxErr1 += 4 * FLT_MIN;
+                                                maxErr2 += 4 * FLT_MIN;
+                                                maxErr3 += 4 * FLT_MIN;
+                                                maxErr4 += 4 * FLT_MIN;
+                                            
+                                                maxPixel = sample_image_pixel_float( imageValues, imageInfo, 
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    imageSampler, expected, 0, NULL );
+                                                
+                                                err1 = fabsf( resultPtr[0] - expected[0] );
+                                                err2 = fabsf( resultPtr[1] - expected[1] );
+                                                err3 = fabsf( resultPtr[2] - expected[2] );
+                                                err4 = fabsf( resultPtr[3] - expected[3] );
+                                            }
+                                        }
+                                        
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            
+                                            float tempOut[4];
+                                            shouldReturn |= determine_validation_error_offset<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], 
+                                                                                                     norm_offset_x, norm_offset_y, norm_offset_z, j, 
+                                                                                                     numTries, numClamped, true );
+                                            log_error( "Step by step:\n" );
+                                            FloatPixel temp = sample_image_pixel_float_offset( imageValues, imageInfo, 
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, tempOut, 1 /*verbose*/, &hasDenormals);
+                                            log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",    
+                                                      Ulp_Error( resultPtr[0], expected[0] ),
+                                                      Ulp_Error( resultPtr[1], expected[1] ),
+                                                      Ulp_Error( resultPtr[2], expected[2] ),
+                                                      Ulp_Error( resultPtr[3], expected[3] ),
+                                                      Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+                        
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        /*
+         * UINT output type
+         */						
+        else if( outputType == kUInt )
+        {
+            // Validate unsigned integer results
+            unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+            unsigned int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < imageInfo->depth; z++ )
+            {
+                for( size_t y = 0; y < imageInfo->height; y++ )
+                {
+                    for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                    
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+                                    
+                                    sample_image_pixel_offset<unsigned int>( imageValues, imageInfo, 
+                                                                            xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                            norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                            imageSampler, expected );
+                                    
+                                    error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                    
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+                        
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                        
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+                                        
+                                        sample_image_pixel_offset<unsigned int>( imageValues, imageInfo, 
+                                                                                xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                imageSampler, expected );
+                                        
+                                        error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                        
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                             expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], 
+                                                                                                             norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                             j, numTries, numClamped, false );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+                        
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        else
+        /*
+         * INT output type
+         */											
+        {
+            // Validate integer results
+            int *resultPtr = (int *)(char *)resultValues;
+            int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < imageInfo->depth; z++ )
+            {
+                for( size_t y = 0; y < imageInfo->height; y++ )
+                {
+                    for( size_t x = 0; x < imageInfo->width; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                    
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+                                    
+                                    sample_image_pixel_offset<int>( imageValues, imageInfo, 
+                                                                   xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                   norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                   imageSampler, expected );
+                                    
+                                    error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                    
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+                        
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                        
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0 || NORM_OFFSET == 0 || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+                                        
+                                        sample_image_pixel_offset<int>( imageValues, imageInfo, 
+                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                       imageSampler, expected );
+                                        
+                                        error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                        
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                    expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], 
+                                                                                                    norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                    j, numTries, numClamped, false );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+                        
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+    }
+    
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+}
+
+int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, 
+                           bool floatCoords, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    RandomSeed seed( gRandomSeed );
+    
+    int error;
+    
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    
+    
+    // Get operating parameters
+    size_t maxWidth, maxHeight, maxDepth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0x0 };
+    
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+    
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+    
+    // Determine types
+    if( outputType == kInt )
+        readFormat = "i";
+    else if( outputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+    
+    // Construct the source
+    const char *samplerArg = samplerKernelArg;
+    char samplerVar[ 1024 ] = "";
+    if( gUseKernelSamplers )
+    {
+        get_sampler_kernel_code( imageSampler, samplerVar );
+        samplerArg = "";
+    }
+    
+    // Construct the source
+    sprintf( programSrc, read3DKernelSourcePattern, samplerArg, get_explicit_type_name( outputType ),
+            samplerVar,
+            floatCoords ? float3DUnnormalizedCoordKernelSource : int3DCoordKernelSource,
+            readFormat );
+    
+    ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+    
+    
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for( imageInfo.depth = 2; imageInfo.depth < 9; imageInfo.depth++ )
+                {
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
+                    int retCode = test_read_image_3D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );	
+                    if( retCode )
+                        return retCode;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format);
+        
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.depth = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+            log_info("Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ]);
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            int retCode = test_read_image_3D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.height = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+        imageInfo.depth = 2;
+        
+        imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+        imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+        int retCode = test_read_image_3D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );	
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed );
+                
+                imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                
+                if( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
+                    
+                    size_t extraHeight = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+                }
+                
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.depth * 4 * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+            
+            if( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
+            int retCode = test_read_image_3D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    
+    return 0;
+}
+
+
+
+
--- a/test_conformance/images/kernel_read_write/test_write_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp
@@ -0,0 +1,503 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#define MAX_ERR 0.005f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool			gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_filter_mode	gFilterModeToSkip;
+extern cl_mem_flags gMemFlagsToUse;
+
+
+const char *write1DKernelSourcePattern = 
+"__kernel void sample_kernel( __global %s4 *input, write_only image1d_t output )\n"
+"{\n"
+"   int tidX = get_global_id(0);\n"
+"   int offset = tidX;\n"
+"   write_image%s( output, tidX, input[ offset ] );\n"
+"}";
+
+int test_write_image_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel, 
+                     image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
+{
+    int                 totalErrors = 0;
+    const cl_mem_flags  mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
+    const char *        mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
+    
+    for( size_t mem_flag_index = 0; mem_flag_index < sizeof( mem_flag_types ) / sizeof( mem_flag_types[0] ); mem_flag_index++ )
+    {
+        int error;
+        size_t threads[2];
+        bool verifyRounding = false;
+        int totalErrors = 0;
+        int forceCorrectlyRoundedWrites = 0;
+        
+#if defined( __APPLE__ )
+        // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
+        cl_device_type type = 0;
+        if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
+        {
+            log_error("Error: Could not get device type for Apple device! (%d) \n", error );
+            return 1;
+        }
+        if( type == CL_DEVICE_TYPE_CPU )
+            forceCorrectlyRoundedWrites = 1;
+#endif
+        
+        if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+            if( DetectFloatToHalfRoundingMode(queue) )
+                return 1;
+        
+        clMemWrapper inputStream;
+        BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
+        
+        create_random_image_data( inputType, imageInfo, imageValues, d );
+        
+        if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
+        {
+            // First, fill with arbitrary floats
+            {
+                float *inputValues = (float *)(char*)imageValues;
+                for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                    inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
+            }
+            
+            // Throw a few extra test values in there
+            float *inputValues = (float *)(char*)imageValues;
+            size_t i = 0;
+            inputValues[ i++ ] = -0.0000000000009f;
+            inputValues[ i++ ] = 1.f;		
+            inputValues[ i++ ] = -1.f;
+            inputValues[ i++ ] = 2.f;		
+            
+            // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+            // is correct
+            if( imageInfo->width > 12 )
+            {
+                float formatMax = (float)get_format_max_int( imageInfo->format );
+                inputValues[ i++ ] = 4.0f / formatMax;
+                inputValues[ i++ ] = 4.3f / formatMax;
+                inputValues[ i++ ] = 4.5f / formatMax;
+                inputValues[ i++ ] = 4.7f / formatMax;
+                inputValues[ i++ ] = 5.0f / formatMax;
+                inputValues[ i++ ] = 5.3f / formatMax;
+                inputValues[ i++ ] = 5.5f / formatMax;
+                inputValues[ i++ ] = 5.7f / formatMax;
+                verifyRounding = true;
+            }
+        }
+        else if( inputType == kUInt )
+        {
+            unsigned int *inputValues = (unsigned int*)(char*)imageValues;
+            size_t i = 0;
+            inputValues[ i++ ] = 0;
+            inputValues[ i++ ] = 65535;
+            inputValues[ i++ ] = 7271820;
+            inputValues[ i++ ] = 0;
+        }
+        
+        // Construct testing sources
+        clProtectedImage protImage;
+        clMemWrapper unprotImage;
+        cl_mem image;
+        
+        if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+        {
+            // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+            // Do not use protected images for max image size test since it rounds the row size to a page size
+            if (gTestMaxImages) {
+                create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
+                
+                unprotImage = create_image_1d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format, 
+                                              imageInfo->width, 0,
+                                              maxImageUseHostPtrBackingStore, NULL, &error );    
+            } else {
+                error = protImage.Create( context, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width );
+            }
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 1D image of size %ld pitch %ld (%s, %s)\n", imageInfo->width, 
+                          imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                return error;
+            }
+            
+            if (gTestMaxImages)
+                image = (cl_mem)unprotImage;
+            else
+                image = (cl_mem)protImage;
+        }
+        else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
+        {
+            // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+            // it works just as if no flag is specified, so we just do the same thing either way
+            // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
+            unprotImage = create_image_1d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format, 
+                                          imageInfo->width, 0,
+                                          imageValues, NULL, &error );
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, 
+                          imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                return error;
+            }
+            image = unprotImage;
+        }
+        
+        inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), 
+                                     get_explicit_type_size( inputType ) * 4 * imageInfo->width, imageValues, &error );
+        test_error( error, "Unable to create input buffer" );
+        
+        // Set arguments
+        error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
+        test_error( error, "Unable to set kernel arguments" );
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
+        test_error( error, "Unable to set kernel arguments" );
+        
+        // Run the kernel
+        threads[0] = (size_t)imageInfo->width;
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+        
+        // Get results
+        size_t resultSize = imageInfo->rowPitch;
+        clProtectedArray PA(resultSize);
+        char *resultValues = (char *)((void *)PA);
+        
+        if( gDebugTrace )
+            log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
+        
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, 1, 1 };
+        
+        error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, 0, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+        
+        // Validate results element by element
+        char *imagePtr = imageValues;
+        int numTries = 5;
+        {
+            char *resultPtr = (char *)resultValues;
+            for( size_t x = 0, i = 0; x < imageInfo->width; x++, i++ )
+            {
+                char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
+                
+                // Convert this pixel
+                if( inputType == kFloat )
+                    pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
+                else if( inputType == kInt )
+                    pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
+                else // if( inputType == kUInt )
+                    pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
+                
+                // Compare against the results
+                if( imageInfo->format->image_channel_data_type == CL_FLOAT )
+                {
+                    // Compare floats
+                    float *expected = (float *)resultBuffer;
+                    float *actual = (float *)resultPtr;
+                    float err = 0.f;
+                    for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                        err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
+                    
+                    err /= (float)get_format_channel_count( imageInfo->format );
+                    if( err > MAX_ERR )
+                    {
+                        unsigned int *e = (unsigned int *)expected;
+                        unsigned int *a = (unsigned int *)actual;
+                        log_error( "ERROR: Sample %ld (%ld) did not validate! (%s)\n", i, x, mem_flag_names[mem_flag_index] );
+                        log_error( "       Error: %g\n", err );
+                        log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                        log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
+                        log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                        log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
+                        totalErrors++;
+                        if( ( --numTries ) == 0 )
+                            return 1;
+                    }
+                }
+                else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+                {                
+                    // Compare half floats
+                    if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
+                    {
+                        cl_ushort *e = (cl_ushort *)resultBuffer;
+                        cl_ushort *a = (cl_ushort *)resultPtr;
+                        int err_cnt = 0;
+
+                        //Fix up cases where we have NaNs
+                        for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                        {
+                            if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
+                                continue;
+                            if( e[j] != a[j] )
+                                err_cnt++;
+                        }
+
+                        if( err_cnt )
+                        {
+                            totalErrors++;
+                            log_error( "ERROR: Sample %ld (%ld) did not validate! (%s)\n", i, x, mem_flag_names[mem_flag_index] );
+                            log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
+                            log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
+                            if( inputType == kFloat )
+                            {
+                                float *p = (float *)(char *)imagePtr;
+                                log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                            }
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                }
+                else
+                {
+                    // Exact result passes every time
+                    if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
+                    { 
+                        // result is inexact.  Calculate error
+                        int failure = 1;
+                        float errors[4] = {NAN, NAN, NAN, NAN};
+                        pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
+                        
+                        // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
+                        if( 0 == forceCorrectlyRoundedWrites    &&
+                           (
+                            imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
+                            imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
+                            imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
+                            imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
+                            imageInfo->format->image_channel_data_type == CL_SNORM_INT16 
+                            ))
+                        {
+                            if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
+                               ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
+                                failure = 0;
+                        }
+                        
+                        
+                        if( failure )
+                        {
+                            totalErrors++;
+                            // Is it our special rounding test?
+                            if( verifyRounding && i >= 1 && i <= 2 )
+                            {
+                                // Try to guess what the rounding mode of the device really is based on what it returned
+                                const char *deviceRounding = "unknown";
+                                unsigned int deviceResults[8];
+                                read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults );
+                                read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ] );
+                                
+                                if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
+                                   deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
+                                    deviceRounding = "truncate";
+                                else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
+                                        deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                    deviceRounding = "round to nearest";
+                                else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
+                                        deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                    deviceRounding = "round to even";
+                                
+                                log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
+                                log_error( "       Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ], 
+                                          deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );	
+                                log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
+                                return 1;
+                            }
+                            log_error( "ERROR: Sample %d (%d) did not validate!\n", (int)i, (int)x );
+                            switch(imageInfo->format->image_channel_data_type)
+                            {
+                                case CL_UNORM_INT8:
+                                case CL_SNORM_INT8:
+                                case CL_UNSIGNED_INT8:
+                                case CL_SIGNED_INT8:
+                                    log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
+                                    log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                                case CL_UNORM_INT16:
+                                case CL_SNORM_INT16:
+                                case CL_UNSIGNED_INT16:
+                                case CL_SIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+                                case CL_SFIXED14_APPLE:
+#endif
+                                    log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                    log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                                case CL_HALF_FLOAT:
+                                    log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                    log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                                case CL_UNSIGNED_INT32:
+                                case CL_SIGNED_INT32:
+                                    log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
+                                    break;
+                                case CL_FLOAT:
+                                    log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
+                                    log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
+                                    log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                            }
+                            
+                            float *v = (float *)(char *)imagePtr;
+                            log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                            log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                            log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
+                            
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                }
+                imagePtr += get_explicit_type_size( inputType ) * 4;
+                resultPtr += get_pixel_size( imageInfo->format );
+            }
+        }
+    }
+    
+    // All done!
+    return totalErrors;
+}
+
+
+int test_write_image_1D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    
+    int error;
+    
+    // Get our operating parameters
+    size_t maxWidth;
+    cl_ulong maxAllocSize, memSize;
+    size_t pixelSize;
+    
+    image_descriptor imageInfo = { 0x0 };
+    
+    imageInfo.format = format;
+    imageInfo.slicePitch = imageInfo.arraySize = 0;
+    imageInfo.height = imageInfo.depth = 1;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+    pixelSize = get_pixel_size( imageInfo.format );
+    
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+    
+    // Determine types
+    if( inputType == kInt )
+        readFormat = "i";
+    else if( inputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+    
+    // Construct the source
+    sprintf( programSrc, write1DKernelSourcePattern, get_explicit_type_name( inputType ), readFormat );
+    
+    ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+    
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            if( gDebugTrace )
+                log_info( "   at size %d\n", (int)imageInfo.width );
+            int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format);
+        
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            log_info("Testing %d\n", (int)imageInfo.width);
+            int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.width = typeRange / 256;
+        
+        imageInfo.rowPitch = imageInfo.width * pixelSize;
+        int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );	
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
+                
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                if( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, d );
+                    imageInfo.rowPitch += extraWidth * pixelSize;
+                }
+                
+                size = (size_t)imageInfo.rowPitch * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+            
+            if( gDebugTrace )
+                log_info( "   at size %d (pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
+            
+            int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    
+    return 0;
+}
--- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
@@ -0,0 +1,522 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#define MAX_ERR 0.005f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool			gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_filter_mode	gFilterModeToSkip;
+extern cl_mem_flags gMemFlagsToUse;
+
+
+const char *write1DArrayKernelSourcePattern = 
+"__kernel void sample_kernel( __global %s4 *input, write_only image1d_array_t output )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"   int offset = tidY*get_image_width(output) + tidX;\n"
+"   write_image%s( output, (int2)( tidX, tidY ), input[ offset ] );\n"
+"}";
+
+int test_write_image_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel, 
+                     image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
+{
+    int                 totalErrors = 0;
+    const cl_mem_flags  mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
+    const char *        mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
+  
+    size_t pixelSize = get_pixel_size( imageInfo->format );
+    
+    for( size_t mem_flag_index = 0; mem_flag_index < sizeof( mem_flag_types ) / sizeof( mem_flag_types[0] ); mem_flag_index++ )
+    {
+        int error;
+        size_t threads[2];
+        bool verifyRounding = false;
+        int totalErrors = 0;
+        int forceCorrectlyRoundedWrites = 0;
+        
+#if defined( __APPLE__ )
+        // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
+        cl_device_type type = 0;
+        if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
+        {
+            log_error("Error: Could not get device type for Apple device! (%d) \n", error );
+            return 1;
+        }
+        if( type == CL_DEVICE_TYPE_CPU )
+            forceCorrectlyRoundedWrites = 1;
+#endif
+        
+        if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+            if( DetectFloatToHalfRoundingMode(queue) )
+                return 1;
+        
+        clMemWrapper inputStream;
+        BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
+        
+        create_random_image_data( inputType, imageInfo, imageValues, d );
+        
+        if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
+        {
+            // First, fill with arbitrary floats
+            for( size_t y = 0; y < imageInfo->arraySize; y++ )
+            {
+                float *inputValues = (float *)(char*)imageValues + y * imageInfo->width * 4;
+                for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                    inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
+            }
+            
+            // Throw a few extra test values in there
+            float *inputValues = (float *)(char*)imageValues;
+            size_t i = 0;
+            inputValues[ i++ ] = -0.0000000000009f;
+            inputValues[ i++ ] = 1.f;		
+            inputValues[ i++ ] = -1.f;
+            inputValues[ i++ ] = 2.f;		
+            
+            // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+            // is correct
+            if( imageInfo->width > 12 )
+            {
+                float formatMax = (float)get_format_max_int( imageInfo->format );
+                inputValues[ i++ ] = 4.0f / formatMax;
+                inputValues[ i++ ] = 4.3f / formatMax;
+                inputValues[ i++ ] = 4.5f / formatMax;
+                inputValues[ i++ ] = 4.7f / formatMax;
+                inputValues[ i++ ] = 5.0f / formatMax;
+                inputValues[ i++ ] = 5.3f / formatMax;
+                inputValues[ i++ ] = 5.5f / formatMax;
+                inputValues[ i++ ] = 5.7f / formatMax;
+                verifyRounding = true;
+            }
+        }
+        else if( inputType == kUInt )
+        {
+            unsigned int *inputValues = (unsigned int*)(char*)imageValues;
+            size_t i = 0;
+            inputValues[ i++ ] = 0;
+            inputValues[ i++ ] = 65535;
+            inputValues[ i++ ] = 7271820;
+            inputValues[ i++ ] = 0;
+        }
+        
+        // Construct testing sources
+        clProtectedImage protImage;
+        clMemWrapper unprotImage;
+        cl_mem image;
+        
+        if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+        {
+            // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+            // Do not use protected images for max image size test since it rounds the row size to a page size
+            if (gTestMaxImages) {
+                create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
+                
+                unprotImage = create_image_1d_array( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format, 
+                                              imageInfo->width, imageInfo->arraySize, 0, 0, 
+                                              maxImageUseHostPtrBackingStore, &error );    
+            } else {
+                error = protImage.Create( context, (cl_mem_object_type)CL_MEM_OBJECT_IMAGE1D_ARRAY, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width, 1, 1, imageInfo->arraySize );
+            }
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 1D image array of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->arraySize, 
+                          imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                return error;
+            }
+            
+            if (gTestMaxImages)
+                image = (cl_mem)unprotImage;
+            else
+                image = (cl_mem)protImage;
+        }
+        else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
+        {
+            // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+            // it works just as if no flag is specified, so we just do the same thing either way
+            // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
+            unprotImage = create_image_1d_array( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format, 
+                                          imageInfo->width, imageInfo->arraySize, 0, 0, 
+                                          imageValues, &error );
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 1D image array of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->arraySize, 
+                          imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                return error;
+            }
+            image = unprotImage;
+        }
+        
+        inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), 
+                                     get_explicit_type_size( inputType ) * 4 * imageInfo->width * imageInfo->arraySize, imageValues, &error );
+        test_error( error, "Unable to create input buffer" );
+        
+        // Set arguments
+        error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
+        test_error( error, "Unable to set kernel arguments" );
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
+        test_error( error, "Unable to set kernel arguments" );
+        
+        // Run the kernel
+        threads[0] = (size_t)imageInfo->width;
+        threads[1] = (size_t)imageInfo->arraySize;
+        
+        error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+        
+        // Get results
+        size_t resultSize = imageInfo->rowPitch * imageInfo->arraySize;
+        clProtectedArray PA(resultSize);
+        char *resultValues = (char *)((void *)PA);
+        
+        if( gDebugTrace )
+            log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
+        
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->arraySize, 1 };
+        
+        error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 
+                                    gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+        
+        // Validate results element by element
+        char *imagePtr = imageValues;
+        int numTries = 5;
+        for( size_t y = 0, i = 0; y < imageInfo->arraySize; y++ )
+        {
+            char *resultPtr = (char *)resultValues + y * imageInfo->rowPitch;
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
+                
+                // Convert this pixel
+                if( inputType == kFloat )
+                    pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
+                else if( inputType == kInt )
+                    pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
+                else // if( inputType == kUInt )
+                    pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
+                
+                // Compare against the results
+                if( imageInfo->format->image_channel_data_type == CL_FLOAT )
+                {
+                    // Compare floats
+                    float *expected = (float *)resultBuffer;
+                    float *actual = (float *)resultPtr;
+                    float err = 0.f;
+                    for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                        err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
+                    
+                    err /= (float)get_format_channel_count( imageInfo->format );
+                    if( err > MAX_ERR )
+                    {
+                        unsigned int *e = (unsigned int *)expected;
+                        unsigned int *a = (unsigned int *)actual;
+                        log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                        log_error( "       Error: %g\n", err );
+                        log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                        log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
+                        log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                        log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
+                        totalErrors++;
+                        if( ( --numTries ) == 0 )
+                            return 1;
+                    }
+                }
+                else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+                {
+                
+                    // Compare half floats
+                    if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
+                    {
+                        cl_ushort *e = (cl_ushort *)resultBuffer;
+                        cl_ushort *a = (cl_ushort *)resultPtr;
+                        int err_cnt = 0;
+
+                        //Fix up cases where we have NaNs
+                        for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                        {
+                            if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
+                                continue;
+                            if( e[j] != a[j] )
+                                err_cnt++;
+                        }
+
+                        if( err_cnt )
+                        {
+                            totalErrors++;
+                            log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                            log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
+                            log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
+                            if( inputType == kFloat )
+                            {
+                                float *p = (float *)(char *)imagePtr;
+                                log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                            }
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                }
+                else
+                {
+                    // Exact result passes every time
+                    if( memcmp( resultBuffer, resultPtr, pixelSize ) != 0 )
+                    { 
+                        // result is inexact.  Calculate error
+                        int failure = 1;
+                        float errors[4] = {NAN, NAN, NAN, NAN};
+                        pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
+                        
+                        // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
+                        if( 0 == forceCorrectlyRoundedWrites    &&
+                           (
+                            imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
+                            imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
+                            imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
+                            imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
+                            imageInfo->format->image_channel_data_type == CL_SNORM_INT16 
+                            ))
+                        {
+                            if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
+                               ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
+                                failure = 0;
+                        }
+                        
+                        
+                        if( failure )
+                        {
+                            totalErrors++;
+                            // Is it our special rounding test?
+                            if( verifyRounding && i >= 1 && i <= 2 )
+                            {
+                                // Try to guess what the rounding mode of the device really is based on what it returned
+                                const char *deviceRounding = "unknown";
+                                unsigned int deviceResults[8];
+                                read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults );
+                                read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ] );
+                                
+                                if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
+                                   deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
+                                    deviceRounding = "truncate";
+                                else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
+                                        deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                    deviceRounding = "round to nearest";
+                                else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
+                                        deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                    deviceRounding = "round to even";
+                                
+                                log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
+                                log_error( "       Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ], 
+                                          deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );	
+                                log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
+                                return 1;
+                            }
+                            log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
+                            switch(imageInfo->format->image_channel_data_type)
+                            {
+                                case CL_UNORM_INT8:
+                                case CL_SNORM_INT8:
+                                case CL_UNSIGNED_INT8:
+                                case CL_SIGNED_INT8:
+                                    log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
+                                    log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                                case CL_UNORM_INT16:
+                                case CL_SNORM_INT16:
+                                case CL_UNSIGNED_INT16:
+                                case CL_SIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+                                case CL_SFIXED14_APPLE:
+#endif
+                                    log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                    log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                                case CL_HALF_FLOAT:
+                                    log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                    log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                                case CL_UNSIGNED_INT32:
+                                case CL_SIGNED_INT32:
+                                    log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
+                                    break;
+                                case CL_FLOAT:
+                                    log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
+                                    log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
+                                    log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                            }
+                            
+                            float *v = (float *)(char *)imagePtr;
+                            log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                            log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                            log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
+                            
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                }
+                imagePtr += get_explicit_type_size( inputType ) * 4;
+                resultPtr += pixelSize;
+            }
+        }
+    }
+    
+    // All done!
+    return totalErrors;
+}
+
+
+int test_write_image_1D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    
+    int error;
+    
+    // Get our operating parameters
+    size_t maxWidth, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    size_t pixelSize;
+    
+    image_descriptor imageInfo = { 0x0 };
+    
+    imageInfo.format = format;
+    imageInfo.slicePitch = 0;
+    imageInfo.height = imageInfo.depth = 1;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    pixelSize = get_pixel_size( imageInfo.format );
+    
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+    
+    // Determine types
+    if( inputType == kInt )
+        readFormat = "i";
+    else if( inputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+    
+    // Construct the source
+    sprintf( programSrc, write1DArrayKernelSourcePattern, get_explicit_type_name( inputType ), readFormat );
+    
+    ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+    
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
+                int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );	
+                if( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format);
+        
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.arraySize);
+            int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.arraySize = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.arraySize );
+        
+        imageInfo.rowPitch = imageInfo.width * pixelSize;
+        imageInfo.slicePitch = imageInfo.rowPitch;
+        int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );	
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, d );
+                
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                if( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, d );
+                    imageInfo.rowPitch += extraWidth * pixelSize;
+                }
+                imageInfo.slicePitch = imageInfo.rowPitch;
+                
+                size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.arraySize * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+            
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
+            
+            int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    
+    return 0;
+}
--- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
@@ -0,0 +1,509 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#define MAX_ERR 0.005f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool			gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_filter_mode	gFilterModeToSkip;
+extern cl_mem_flags gMemFlagsToUse;
+
+extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z, 
+                                ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
+
+const char *write2DArrayKernelSourcePattern = 
+"__kernel void sample_kernel( __global %s4 *input, write_only image2d_array_t output )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"   int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n"
+"   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ), input[ offset ] );\n"
+"}";
+
+int test_write_image_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel, 
+                        image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
+{
+    int                 totalErrors = 0;
+    const cl_mem_flags  mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
+    const char *        mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
+    
+    for( size_t mem_flag_index = 0; mem_flag_index < sizeof( mem_flag_types ) / sizeof( mem_flag_types[0] ); mem_flag_index++ )
+    {
+        int error;
+        size_t threads[3];
+        bool verifyRounding = false;
+        int totalErrors = 0;
+        int forceCorrectlyRoundedWrites = 0;
+        
+#if defined( __APPLE__ )
+        // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
+        cl_device_type type = 0;
+        if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
+        {
+            log_error("Error: Could not get device type for Apple device! (%d) \n", error );
+            return 1;
+        }
+        if( type == CL_DEVICE_TYPE_CPU )
+            forceCorrectlyRoundedWrites = 1;
+#endif
+        
+        if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+            if( DetectFloatToHalfRoundingMode(queue) )
+                return 1;
+        
+        clMemWrapper inputStream;
+        BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
+        
+        create_random_image_data( inputType, imageInfo, imageValues, d );
+        
+        if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
+        {
+            // First, fill with arbitrary floats
+            for( size_t z = 0; z < imageInfo->arraySize; z++ )
+            {
+                for( size_t y = 0; y < imageInfo->height; y++ )
+                {
+                    float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
+                    for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                        inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
+                }
+            }
+            
+            // Throw a few extra test values in there
+            float *inputValues = (float *)(char*)imageValues;
+            size_t i = 0;
+            inputValues[ i++ ] = -0.0000000000009f;
+            inputValues[ i++ ] = 1.f;		
+            inputValues[ i++ ] = -1.f;
+            inputValues[ i++ ] = 2.f;		
+            
+            // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+            // is correct
+            if( imageInfo->width > 12 )
+            {
+                float formatMax = (float)get_format_max_int( imageInfo->format );
+                inputValues[ i++ ] = 4.0f / formatMax;
+                inputValues[ i++ ] = 4.3f / formatMax;
+                inputValues[ i++ ] = 4.5f / formatMax;
+                inputValues[ i++ ] = 4.7f / formatMax;
+                inputValues[ i++ ] = 5.0f / formatMax;
+                inputValues[ i++ ] = 5.3f / formatMax;
+                inputValues[ i++ ] = 5.5f / formatMax;
+                inputValues[ i++ ] = 5.7f / formatMax;
+                verifyRounding = true;
+            }
+        }
+        else if( inputType == kUInt )
+        {
+            unsigned int *inputValues = (unsigned int*)(char*)imageValues;
+            size_t i = 0;
+            inputValues[ i++ ] = 0;
+            inputValues[ i++ ] = 65535;
+            inputValues[ i++ ] = 7271820;
+            inputValues[ i++ ] = 0;
+        }
+        
+        // Construct testing sources
+        clProtectedImage protImage;
+        clMemWrapper unprotImage;
+        cl_mem image;
+        
+        if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+        {
+            create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
+            
+            unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format, 
+                                          imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0,
+                                          maxImageUseHostPtrBackingStore, &error );    
+            
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
+                return error;
+            }
+            
+            image = (cl_mem)unprotImage;
+        }
+        else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
+        {
+            // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+            // it works just as if no flag is specified, so we just do the same thing either way
+            // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
+            unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format, 
+                                          imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, imageValues, &error );
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
+                return error;
+            }
+            image = unprotImage;
+        }
+        
+        inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), 
+                                     get_explicit_type_size( inputType ) * 4 * imageInfo->width * imageInfo->height * imageInfo->arraySize, imageValues, &error );
+        test_error( error, "Unable to create input buffer" );
+        
+        // Set arguments
+        error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
+        test_error( error, "Unable to set kernel arguments" );
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
+        test_error( error, "Unable to set kernel arguments" );
+        
+        // Run the kernel
+        threads[0] = (size_t)imageInfo->width;
+        threads[1] = (size_t)imageInfo->height;
+        threads[2] = (size_t)imageInfo->arraySize;
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, NULL, 0, NULL );
+        test_error( error, "Unable to run kernel" );
+        
+        // Get results
+        size_t resultSize = imageInfo->slicePitch *imageInfo->arraySize;
+        clProtectedArray PA(resultSize);
+        char *resultValues = (char *)((void *)PA);
+        
+        if( gDebugTrace )
+            log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
+        
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->arraySize };
+        
+        error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+        
+        // Validate results element by element
+        char *imagePtr = imageValues;
+        int numTries = 5;
+        for( size_t z = 0, i = 0; z < imageInfo->arraySize; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                char *resultPtr = (char *)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
+                    
+                    // Convert this pixel
+                    if( inputType == kFloat )
+                        pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
+                    else if( inputType == kInt )
+                        pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
+                    else // if( inputType == kUInt )
+                        pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
+                    
+                    // Compare against the results
+                    if( imageInfo->format->image_channel_data_type == CL_FLOAT )
+                    {
+                        // Compare floats
+                        float *expected = (float *)resultBuffer;
+                        float *actual = (float *)resultPtr;
+                        float err = 0.f;
+                        for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                            err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
+                        
+                        err /= (float)get_format_channel_count( imageInfo->format );
+                        if( err > MAX_ERR )
+                        {
+                            unsigned int *e = (unsigned int *)expected;
+                            unsigned int *a = (unsigned int *)actual;
+                            log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                            log_error( "       Error: %g\n", err );
+                            log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                            log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
+                            log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                            log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
+                            totalErrors++;
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                    else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+                    {
+                        // Compare half floats
+                        if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
+                        {
+                            totalErrors++;
+                            log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                            unsigned short *e = (unsigned short *)resultBuffer;
+                            unsigned short *a = (unsigned short *)resultPtr;
+                            log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
+                            log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
+                            if( inputType == kFloat )
+                            {
+                                float *p = (float *)(char *)imagePtr;
+                                log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                            }
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                    else
+                    {
+                        // Exact result passes every time
+                        if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
+                        { 
+                            // result is inexact.  Calculate error
+                            int failure = 1;
+                            float errors[4] = {NAN, NAN, NAN, NAN};
+                            pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
+                            
+                            // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
+                            if( 0 == forceCorrectlyRoundedWrites    &&
+                               (
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
+                                imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
+                                imageInfo->format->image_channel_data_type == CL_SNORM_INT16 
+                                ))
+                            {
+                                if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
+                                   ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
+                                    failure = 0;
+                            }
+                            
+                            
+                            if( failure )
+                            {
+                                totalErrors++;
+                                // Is it our special rounding test?
+                                if( verifyRounding && i >= 1 && i <= 2 )
+                                {
+                                    // Try to guess what the rounding mode of the device really is based on what it returned
+                                    const char *deviceRounding = "unknown";
+                                    unsigned int deviceResults[8];
+                                    read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults );
+                                    read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ] );
+                                    
+                                    if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
+                                       deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
+                                        deviceRounding = "truncate";
+                                    else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
+                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                        deviceRounding = "round to nearest";
+                                    else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
+                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                        deviceRounding = "round to even";
+                                    
+                                    log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
+                                    log_error( "       Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ], 
+                                              deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );	
+                                    log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
+                                    return 1;
+                                }
+                                log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
+                                switch(imageInfo->format->image_channel_data_type)
+                                {
+                                    case CL_UNORM_INT8:
+                                    case CL_SNORM_INT8:
+                                    case CL_UNSIGNED_INT8:
+                                    case CL_SIGNED_INT8:
+                                        log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
+                                        log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_UNORM_INT16:
+                                    case CL_SNORM_INT16:
+                                    case CL_UNSIGNED_INT16:
+                                    case CL_SIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+                                    case CL_SFIXED14_APPLE:
+#endif
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                        log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_HALF_FLOAT:
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                        log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_UNSIGNED_INT32:
+                                    case CL_SIGNED_INT32:
+                                        log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
+                                        break;
+                                    case CL_FLOAT:
+                                        log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
+                                        log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
+                                        log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                }
+                                
+                                float *v = (float *)(char *)imagePtr;
+                                log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
+                                
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                        }
+                    }
+                    imagePtr += get_explicit_type_size( inputType ) * 4;
+                    resultPtr += get_pixel_size( imageInfo->format );
+                }
+            }
+        }
+    }
+    // All done!
+    return totalErrors;
+}
+
+
+int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error;
+    
+    // Get our operating parameters
+    size_t maxWidth, maxHeight, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    
+    image_descriptor imageInfo = { 0x0 };
+    
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    imageInfo.depth = 1;
+    imageInfo.slicePitch = 0;
+    
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+    
+    // Determine types
+    if( inputType == kInt )
+        readFormat = "i";
+    else if( inputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+    
+    // Construct the source
+    sprintf( programSrc, write2DArrayKernelSourcePattern, get_explicit_type_name( inputType ), readFormat );
+    
+    ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+    
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                for( imageInfo.arraySize = 2; imageInfo.arraySize < 7; imageInfo.arraySize++ )
+                {
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
+                    int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );	
+                    if( retCode )
+                        return retCode;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format);
+        
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+            log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize);
+            int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.height = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+        imageInfo.arraySize = 2;
+        
+        imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+        imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+        int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );	
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, d );
+                
+                imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                if( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, d );
+                    imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
+                    
+                    imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                    extraWidth = (int)random_log_in_range( 0, 64, d );
+                    imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
+                }
+                
+                size = (size_t)imageInfo.slicePitch * (size_t)imageInfo.arraySize * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+            
+            if( gDebugTrace )
+                log_info( "   at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.arraySize, 
+                         imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxArraySize );
+            
+            int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    
+    return 0;
+}
--- a/test_conformance/images/kernel_read_write/test_write_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp
@@ -0,0 +1,508 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#define MAX_ERR 0.005f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool			gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_filter_mode	gFilterModeToSkip;
+extern cl_mem_flags gMemFlagsToUse;
+
+extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z, 
+                                ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
+
+const char *write3DKernelSourcePattern = 
+"#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"
+"__kernel void sample_kernel( __global %s4 *input, write_only image3d_t output )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"   int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n"
+"   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ), input[ offset ] );\n"
+"}";
+
+int test_write_image_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel, 
+                        image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
+{
+    int                 totalErrors = 0;
+    const cl_mem_flags  mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
+    const char *        mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
+    
+    for( size_t mem_flag_index = 0; mem_flag_index < sizeof( mem_flag_types ) / sizeof( mem_flag_types[0] ); mem_flag_index++ )
+    {
+        int error;
+        size_t threads[3];
+        bool verifyRounding = false;
+        int totalErrors = 0;
+        int forceCorrectlyRoundedWrites = 0;
+        
+#if defined( __APPLE__ )
+        // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
+        cl_device_type type = 0;
+        if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
+        {
+            log_error("Error: Could not get device type for Apple device! (%d) \n", error );
+            return 1;
+        }
+        if( type == CL_DEVICE_TYPE_CPU )
+            forceCorrectlyRoundedWrites = 1;
+#endif
+        
+        if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+            if( DetectFloatToHalfRoundingMode(queue) )
+                return 1;
+        
+        clMemWrapper inputStream;
+        BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
+        
+        create_random_image_data( inputType, imageInfo, imageValues, d );
+        
+        if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
+        {
+            // First, fill with arbitrary floats
+            for( size_t z = 0; z < imageInfo->depth; z++ )
+            {
+                for( size_t y = 0; y < imageInfo->height; y++ )
+                {
+                    float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
+                    for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                        inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
+                }
+            }
+            
+            // Throw a few extra test values in there
+            float *inputValues = (float *)(char*)imageValues;
+            size_t i = 0;
+            inputValues[ i++ ] = -0.0000000000009f;
+            inputValues[ i++ ] = 1.f;		
+            inputValues[ i++ ] = -1.f;
+            inputValues[ i++ ] = 2.f;		
+            
+            // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+            // is correct
+            if( imageInfo->width > 12 )
+            {
+                float formatMax = (float)get_format_max_int( imageInfo->format );
+                inputValues[ i++ ] = 4.0f / formatMax;
+                inputValues[ i++ ] = 4.3f / formatMax;
+                inputValues[ i++ ] = 4.5f / formatMax;
+                inputValues[ i++ ] = 4.7f / formatMax;
+                inputValues[ i++ ] = 5.0f / formatMax;
+                inputValues[ i++ ] = 5.3f / formatMax;
+                inputValues[ i++ ] = 5.5f / formatMax;
+                inputValues[ i++ ] = 5.7f / formatMax;
+                verifyRounding = true;
+            }
+        }
+        else if( inputType == kUInt )
+        {
+            unsigned int *inputValues = (unsigned int*)(char*)imageValues;
+            size_t i = 0;
+            inputValues[ i++ ] = 0;
+            inputValues[ i++ ] = 65535;
+            inputValues[ i++ ] = 7271820;
+            inputValues[ i++ ] = 0;
+        }
+        
+        // Construct testing sources
+        clProtectedImage protImage;
+        clMemWrapper unprotImage;
+        cl_mem image;
+        
+        if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+        {
+            create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
+            
+            unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format, 
+                                          imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0,
+                                          maxImageUseHostPtrBackingStore, &error );    
+            
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
+                return error;
+            }
+            
+            image = (cl_mem)unprotImage;
+        }
+        else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
+        {
+            // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+            // it works just as if no flag is specified, so we just do the same thing either way
+            // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
+            unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format, 
+                                          imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, imageValues, &error );
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
+                return error;
+            }
+            image = unprotImage;
+        }
+        
+        inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), 
+                                     get_explicit_type_size( inputType ) * 4 * imageInfo->width * imageInfo->height * imageInfo->depth, imageValues, &error );
+        test_error( error, "Unable to create input buffer" );
+        
+        // Set arguments
+        error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
+        test_error( error, "Unable to set kernel arguments" );
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
+        test_error( error, "Unable to set kernel arguments" );
+        
+        // Run the kernel
+        threads[0] = (size_t)imageInfo->width;
+        threads[1] = (size_t)imageInfo->height;
+        threads[2] = (size_t)imageInfo->depth;
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, NULL, 0, NULL );
+        test_error( error, "Unable to run kernel" );
+        
+        // Get results
+        size_t resultSize = imageInfo->slicePitch *imageInfo->depth;
+        clProtectedArray PA(resultSize);
+        char *resultValues = (char *)((void *)PA);
+        
+        if( gDebugTrace )
+            log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
+        
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth };
+        
+        error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+        
+        // Validate results element by element
+        char *imagePtr = imageValues;
+        int numTries = 5;
+        for( size_t z = 0, i = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                char *resultPtr = (char *)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
+                    
+                    // Convert this pixel
+                    if( inputType == kFloat )
+                        pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
+                    else if( inputType == kInt )
+                        pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
+                    else // if( inputType == kUInt )
+                        pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
+                    
+                    // Compare against the results
+                    if( imageInfo->format->image_channel_data_type == CL_FLOAT )
+                    {
+                        // Compare floats
+                        float *expected = (float *)resultBuffer;
+                        float *actual = (float *)resultPtr;
+                        float err = 0.f;
+                        for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                            err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
+                        
+                        err /= (float)get_format_channel_count( imageInfo->format );
+                        if( err > MAX_ERR )
+                        {
+                            unsigned int *e = (unsigned int *)expected;
+                            unsigned int *a = (unsigned int *)actual;
+                            log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                            log_error( "       Error: %g\n", err );
+                            log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                            log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
+                            log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                            log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
+                            totalErrors++;
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                    else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+                    {
+                        // Compare half floats
+                        if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
+                        {
+                            totalErrors++;
+                            log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                            unsigned short *e = (unsigned short *)resultBuffer;
+                            unsigned short *a = (unsigned short *)resultPtr;
+                            log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
+                            log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
+                            if( inputType == kFloat )
+                            {
+                                float *p = (float *)(char *)imagePtr;
+                                log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                            }
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                    else
+                    {
+                        // Exact result passes every time
+                        if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
+                        { 
+                            // result is inexact.  Calculate error
+                            int failure = 1;
+                            float errors[4] = {NAN, NAN, NAN, NAN};
+                            pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
+                            
+                            // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
+                            if( 0 == forceCorrectlyRoundedWrites    &&
+                               (
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
+                                imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
+                                imageInfo->format->image_channel_data_type == CL_SNORM_INT16 
+                                ))
+                            {
+                                if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
+                                   ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
+                                    failure = 0;
+                            }
+                            
+                            
+                            if( failure )
+                            {
+                                totalErrors++;
+                                // Is it our special rounding test?
+                                if( verifyRounding && i >= 1 && i <= 2 )
+                                {
+                                    // Try to guess what the rounding mode of the device really is based on what it returned
+                                    const char *deviceRounding = "unknown";
+                                    unsigned int deviceResults[8];
+                                    read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults );
+                                    read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ] );
+                                    
+                                    if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
+                                       deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
+                                        deviceRounding = "truncate";
+                                    else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
+                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                        deviceRounding = "round to nearest";
+                                    else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
+                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                        deviceRounding = "round to even";
+                                    
+                                    log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
+                                    log_error( "       Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ], 
+                                              deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );	
+                                    log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
+                                    return 1;
+                                }
+                                log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
+                                switch(imageInfo->format->image_channel_data_type)
+                                {
+                                    case CL_UNORM_INT8:
+                                    case CL_SNORM_INT8:
+                                    case CL_UNSIGNED_INT8:
+                                    case CL_SIGNED_INT8:
+                                        log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
+                                        log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_UNORM_INT16:
+                                    case CL_SNORM_INT16:
+                                    case CL_UNSIGNED_INT16:
+                                    case CL_SIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+                                    case CL_SFIXED14_APPLE:
+#endif
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                        log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_HALF_FLOAT:
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                        log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_UNSIGNED_INT32:
+                                    case CL_SIGNED_INT32:
+                                        log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
+                                        break;
+                                    case CL_FLOAT:
+                                        log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
+                                        log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
+                                        log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                }
+                                
+                                float *v = (float *)(char *)imagePtr;
+                                log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
+                                
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                        }
+                    }
+                    imagePtr += get_explicit_type_size( inputType ) * 4;
+                    resultPtr += get_pixel_size( imageInfo->format );
+                }
+            }
+        }
+    }
+    // All done!
+    return totalErrors;
+}
+
+
+int test_write_image_3D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error;
+    
+    // Get our operating parameters
+    size_t maxWidth, maxHeight, maxDepth;
+    cl_ulong maxAllocSize, memSize;
+    
+    image_descriptor imageInfo = { 0x0 };
+    
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+    
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+    
+    // Determine types
+    if( inputType == kInt )
+        readFormat = "i";
+    else if( inputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+    
+    // Construct the source
+    sprintf( programSrc, write3DKernelSourcePattern, get_explicit_type_name( inputType ), readFormat );
+    
+    ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+    
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+				for( imageInfo.depth = 2; imageInfo.depth < 7; imageInfo.depth++ )
+                {
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
+                    int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );	
+                    if( retCode )
+                        return retCode;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format);
+        
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.depth = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+            log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth);
+            int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.height = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+        imageInfo.depth = 1;
+        
+        imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+        imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+        int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );	
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
+                imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, d );
+                
+                imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                if( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, d );
+                    imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
+                    
+                    imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                    extraWidth = (int)random_log_in_range( 0, 64, d );
+                    imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
+                }
+                
+                size = (size_t)imageInfo.slicePitch * (size_t)imageInfo.depth * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+            
+            if( gDebugTrace )
+                log_info( "   at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.depth, 
+                         imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxDepth );
+            
+            int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    
+    return 0;
+}
--- a/test_conformance/images/kernel_read_write/test_write_image.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp
@@ -0,0 +1,575 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#define MAX_ERR 0.005f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool			gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_filter_mode	gFilterModeToSkip;
+extern cl_mem_flags gMemFlagsToUse;
+
+extern int test_write_image_1D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_3D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_1D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
+
+
+const char *writeKernelSourcePattern = 
+"__kernel void sample_kernel( __global %s4 *input, write_only image2d_t output )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"   int offset = tidY*get_image_width(output) + tidX;\n"
+"   write_image%s( output, (int2)( tidX, tidY ), input[ offset ] );\n"
+"}";
+
+int test_write_image( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel, 
+                     image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
+{
+    int                 totalErrors = 0;
+    const cl_mem_flags  mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
+    const char *        mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
+    
+    for( size_t mem_flag_index = 0; mem_flag_index < sizeof( mem_flag_types ) / sizeof( mem_flag_types[0] ); mem_flag_index++ )
+    {
+        int error;
+        size_t threads[2];
+        bool verifyRounding = false;
+        int totalErrors = 0;
+        int forceCorrectlyRoundedWrites = 0;
+        
+#if defined( __APPLE__ )
+        // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
+        cl_device_type type = 0;
+        if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
+        {
+            log_error("Error: Could not get device type for Apple device! (%d) \n", error );
+            return 1;
+        }
+        if( type == CL_DEVICE_TYPE_CPU )
+            forceCorrectlyRoundedWrites = 1;
+#endif
+        
+        if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+            if( DetectFloatToHalfRoundingMode(queue) )
+                return 1;
+        
+        clMemWrapper inputStream;
+        BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
+        
+        create_random_image_data( inputType, imageInfo, imageValues, d );
+        
+        if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
+        {
+            // First, fill with arbitrary floats
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4;
+                for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                    inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
+            }
+            
+            // Throw a few extra test values in there
+            float *inputValues = (float *)(char*)imageValues;
+            size_t i = 0;
+            inputValues[ i++ ] = -0.0000000000009f;
+            inputValues[ i++ ] = 1.f;		
+            inputValues[ i++ ] = -1.f;
+            inputValues[ i++ ] = 2.f;		
+            
+            // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+            // is correct
+            if( imageInfo->width > 12 )
+            {
+                float formatMax = (float)get_format_max_int( imageInfo->format );
+                inputValues[ i++ ] = 4.0f / formatMax;
+                inputValues[ i++ ] = 4.3f / formatMax;
+                inputValues[ i++ ] = 4.5f / formatMax;
+                inputValues[ i++ ] = 4.7f / formatMax;
+                inputValues[ i++ ] = 5.0f / formatMax;
+                inputValues[ i++ ] = 5.3f / formatMax;
+                inputValues[ i++ ] = 5.5f / formatMax;
+                inputValues[ i++ ] = 5.7f / formatMax;
+                verifyRounding = true;
+            }
+        }
+        else if( inputType == kUInt )
+        {
+            unsigned int *inputValues = (unsigned int*)(char*)imageValues;
+            size_t i = 0;
+            inputValues[ i++ ] = 0;
+            inputValues[ i++ ] = 65535;
+            inputValues[ i++ ] = 7271820;
+            inputValues[ i++ ] = 0;
+        }
+        
+        // Construct testing sources
+        clProtectedImage protImage;
+        clMemWrapper unprotImage;
+        cl_mem image;
+        
+        if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+        {
+            // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+            // Do not use protected images for max image size test since it rounds the row size to a page size
+            if (gTestMaxImages) {
+                create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
+                
+                unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format, 
+                                              imageInfo->width, imageInfo->height, 0,
+                                              maxImageUseHostPtrBackingStore, &error );    
+            } else {
+                error = protImage.Create( context, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width, imageInfo->height );
+            }
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height, 
+                          imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                return error;
+            }
+            
+            if (gTestMaxImages)
+                image = (cl_mem)unprotImage;
+            else
+                image = (cl_mem)protImage;
+        }
+        else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
+        {
+            // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+            // it works just as if no flag is specified, so we just do the same thing either way
+            // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
+            unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format, 
+                                          imageInfo->width, imageInfo->height, 0,
+                                          imageValues, &error );
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height, 
+                          imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                return error;
+            }
+            image = unprotImage;
+        }
+        
+        inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), 
+                                     get_explicit_type_size( inputType ) * 4 * imageInfo->width * imageInfo->height, imageValues, &error );
+        test_error( error, "Unable to create input buffer" );
+        
+        // Set arguments
+        error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
+        test_error( error, "Unable to set kernel arguments" );
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
+        test_error( error, "Unable to set kernel arguments" );
+        
+        // Run the kernel
+        threads[0] = (size_t)imageInfo->width;
+        threads[1] = (size_t)imageInfo->height;
+        error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+        
+        // Get results
+        size_t resultSize = imageInfo->rowPitch * imageInfo->height;
+        clProtectedArray PA(resultSize);
+        char *resultValues = (char *)((void *)PA);
+        
+        if( gDebugTrace )
+            log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
+        
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
+        
+        error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, 0, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+        
+        // Validate results element by element
+        char *imagePtr = imageValues;
+        int numTries = 5;
+        for( size_t y = 0, i = 0; y < imageInfo->height; y++ )
+        {
+            char *resultPtr = (char *)resultValues + y * imageInfo->rowPitch;
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
+                
+                // Convert this pixel
+                if( inputType == kFloat )
+                    pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
+                else if( inputType == kInt )
+                    pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
+                else // if( inputType == kUInt )
+                    pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
+                
+                // Compare against the results
+                if( imageInfo->format->image_channel_data_type == CL_FLOAT )
+                {
+                    // Compare floats
+                    float *expected = (float *)resultBuffer;
+                    float *actual = (float *)resultPtr;
+                    float err = 0.f;
+                    for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                        err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
+                    
+                    err /= (float)get_format_channel_count( imageInfo->format );
+                    if( err > MAX_ERR )
+                    {
+                        unsigned int *e = (unsigned int *)expected;
+                        unsigned int *a = (unsigned int *)actual;
+                        log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                        log_error( "       Error: %g\n", err );
+                        log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                        log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
+                        log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                        log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
+                        totalErrors++;
+                        if( ( --numTries ) == 0 )
+                            return 1;
+                    }
+                }
+                else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+                {
+                
+                    // Compare half floats
+                    if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
+                    {
+                        cl_ushort *e = (cl_ushort *)resultBuffer;
+                        cl_ushort *a = (cl_ushort *)resultPtr;
+                        int err_cnt = 0;
+
+                        //Fix up cases where we have NaNs
+                        for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                        {
+                            if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
+                                continue;
+                            if( e[j] != a[j] )
+                                err_cnt++;
+                        }
+
+                        if( err_cnt )
+                        {
+                            totalErrors++;
+                            log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                            log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
+                            log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
+                            if( inputType == kFloat )
+                            {
+                                float *p = (float *)(char *)imagePtr;
+                                log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                            }
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                }
+                else
+                {
+                    // Exact result passes every time
+                    if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
+                    { 
+                        // result is inexact.  Calculate error
+                        int failure = 1;
+                        float errors[4] = {NAN, NAN, NAN, NAN};
+                        pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
+                        
+                        // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
+                        if( 0 == forceCorrectlyRoundedWrites    &&
+                           (
+                            imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
+                            imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
+                            imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
+                            imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
+                            imageInfo->format->image_channel_data_type == CL_SNORM_INT16 
+                            ))
+                        {
+                            if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
+                               ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
+                                failure = 0;
+                        }
+                        
+                        
+                        if( failure )
+                        {
+                            totalErrors++;
+                            // Is it our special rounding test?
+                            if( verifyRounding && i >= 1 && i <= 2 )
+                            {
+                                // Try to guess what the rounding mode of the device really is based on what it returned
+                                const char *deviceRounding = "unknown";
+                                unsigned int deviceResults[8];
+                                read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults );
+                                read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ] );
+                                
+                                if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
+                                   deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
+                                    deviceRounding = "truncate";
+                                else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
+                                        deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                    deviceRounding = "round to nearest";
+                                else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
+                                        deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                    deviceRounding = "round to even";
+                                
+                                log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
+                                log_error( "       Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ], 
+                                          deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );	
+                                log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
+                                return 1;
+                            }
+                            log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
+                            switch(imageInfo->format->image_channel_data_type)
+                            {
+                                case CL_UNORM_INT8:
+                                case CL_SNORM_INT8:
+                                case CL_UNSIGNED_INT8:
+                                case CL_SIGNED_INT8:
+                                case CL_UNORM_INT_101010:
+                                    log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
+                                    log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                                case CL_UNORM_INT16:
+                                case CL_SNORM_INT16:
+                                case CL_UNSIGNED_INT16:
+                                case CL_SIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+                                case CL_SFIXED14_APPLE:
+#endif
+                                    log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                    log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                                case CL_HALF_FLOAT:
+                                    log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                    log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                                case CL_UNSIGNED_INT32:
+                                case CL_SIGNED_INT32:
+                                    log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
+                                    log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
+                                    break;
+                                case CL_FLOAT:
+                                    log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
+                                    log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
+                                    log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                    break;
+                            }
+                            
+                            float *v = (float *)(char *)imagePtr;
+                            log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                            log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                            log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
+                            
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                }
+                imagePtr += get_explicit_type_size( inputType ) * 4;
+                resultPtr += get_pixel_size( imageInfo->format );
+            }
+        }
+    }
+    
+    // All done!
+    return totalErrors;
+}
+
+
+int test_write_image_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    
+    int error;
+    
+    
+    // Get our operating parameters
+    size_t maxWidth, maxHeight;
+    cl_ulong maxAllocSize, memSize;
+    
+    image_descriptor imageInfo = { 0x0 };
+    
+    imageInfo.format = format;
+    imageInfo.slicePitch = imageInfo.arraySize = imageInfo.depth = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+    
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+    
+    // Determine types
+    if( inputType == kInt )
+        readFormat = "i";
+    else if( inputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+    
+    // Construct the source
+    sprintf( programSrc, writeKernelSourcePattern, get_explicit_type_name( inputType ), readFormat );
+    
+    ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+    
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
+                int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );	
+                if( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format);
+        
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.height);
+            int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.height = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+        
+        imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+        int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );	
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
+                
+                imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                if( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, d );
+                    imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
+                }
+                
+                size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+            
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
+            
+            int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );	
+            if( retCode )
+                return retCode;
+        }
+    }
+    
+    return 0;
+}
+
+int test_write_image_formats( cl_device_id device, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+                             image_sampler_data *imageSampler, ExplicitType inputType, cl_mem_object_type imageType )
+{
+    if( imageSampler->filter_mode == CL_FILTER_LINEAR )
+        // No need to run for linear filters
+        return 0;
+    
+    int ret = 0;
+    
+    log_info( "write_image (%s input) *****************************\n", get_explicit_type_name( inputType ) );
+
+
+    RandomSeed seed( gRandomSeed );
+    
+    for( unsigned int i = 0; i < numFormats; i++ )
+    {
+        if( filterFlags[ i ] )
+            continue;
+        
+        gTestCount++;
+        cl_image_format &imageFormat = formatList[ i ];
+        
+        print_write_header( &imageFormat, false );		
+        int retCode;
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                retCode = test_write_image_1D_set( device, &imageFormat, inputType, seed );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                retCode = test_write_image_set( device, &imageFormat, inputType, seed );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                retCode = test_write_image_3D_set( device, &imageFormat, inputType, seed );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                retCode = test_write_image_1D_array_set( device, &imageFormat, inputType, seed );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                retCode = test_write_image_2D_array_set( device, &imageFormat, inputType, seed );
+                break;
+        }
+
+        if( retCode != 0 )
+        {
+            gTestFailure++;
+            log_error( "FAILED: " );
+            print_write_header( &imageFormat, true );
+            log_info( "\n" );
+        }
+        ret += retCode;
+    }
+    return ret;
+}
+
+