Initial open source release of OpenCL 2.0 CTS.

2026-03-23 15:39:03 +00:00 · 2017-05-16 18:50:35 +05:30
parent 6911ba5116
commit 3a440d17c8
883 changed files with 318212 additions and 0 deletions
--- a/test_conformance/compatibility/CMakeLists.txt
+++ b/test_conformance/compatibility/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(test_conformance)
--- a/test_conformance/compatibility/test_common/Makefile
+++ b/test_conformance/compatibility/test_common/Makefile
@@ -0,0 +1,26 @@
+
+PRODUCTS = harness/\
+
+# utils/
+ 
+TOP=$(shell pwd)
+
+all: $(PRODUCTS)
+
+clean:
+	@for testdir in $(dir $(PRODUCTS))  ; \
+		do ( \
+			echo "==================================================================================" ; \
+			echo "Cleaning $$testdir" ; \
+			echo "==================================================================================" ; \
+			cd $$testdir && make clean \
+			); \
+		done \
+
+$(PRODUCTS): 
+	@echo "==================================================================================" ;
+	@echo "(`date "+%H:%M:%S"`) Make $@" ;
+	@echo "==================================================================================" ;
+	cd $(dir $@) && make
+
+.PHONY: clean $(PRODUCTS)  all
--- a/test_conformance/compatibility/test_common/gl/gl_headers.h
+++ b/test_conformance/compatibility/test_common/gl/gl_headers.h
@@ -0,0 +1,52 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _gl_headers_h
+#define _gl_headers_h
+
+#if defined( __APPLE__ )
+    #include <OpenGL/OpenGL.h>
+#if defined(CGL_VERSION_1_3)
+    #include <OpenGL/gl3.h>
+    #include <OpenGL/gl3ext.h>
+#else
+    #include <OpenGL/gl.h>
+    #include <OpenGL/glext.h>
+#endif
+    #include <GLUT/glut.h>
+#else
+#ifdef _WIN32
+    #include <windows.h>
+#endif
+     #include <GL/glew.h>
+    #include <GL/gl.h>
+     #include <GL/glext.h>
+#ifdef _WIN32
+    #include <GL/glut.h>
+#else
+    #include <GL/freeglut.h>
+#endif
+
+#endif
+
+#ifdef _WIN32
+    GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString);
+    // No glutGetProcAddress in the standard glut v3.7.
+    #define glutGetProcAddress(procName) wglGetProcAddress(procName)
+#endif
+
+
+#endif    // __gl_headers_h
+
--- a/test_conformance/compatibility/test_common/gl/helpers.cpp
+++ b/test_conformance/compatibility/test_common/gl/helpers.cpp
--- a/test_conformance/compatibility/test_common/gl/helpers.h
+++ b/test_conformance/compatibility/test_common/gl/helpers.h
@@ -0,0 +1,283 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _helpers_h
+#define _helpers_h
+
+#include "../harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#include "gl_headers.h"
+#include <CL/cl_gl.h>
+#else
+#include "gl_headers.h"
+#endif
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLBuffer_fn)(cl_context     context,
+                          cl_mem_flags   flags,
+                          GLuint         bufobj,
+                          int *          errcode_ret);
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLTexture_fn)(cl_context       context ,
+                        cl_mem_flags     flags ,
+                        GLenum           target ,
+                        GLint            miplevel ,
+                        GLuint           texture ,
+                        cl_int *         errcode_ret) ;
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLTexture2D_fn)(cl_context       context ,
+                        cl_mem_flags     flags ,
+                        GLenum           target ,
+                        GLint            miplevel ,
+                        GLuint           texture ,
+                        cl_int *         errcode_ret) ;
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLTexture3D_fn)(cl_context       context ,
+                        cl_mem_flags     flags ,
+                        GLenum           target ,
+                        GLint            miplevel ,
+                        GLuint           texture ,
+                        cl_int *         errcode_ret) ;
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLRenderbuffer_fn)(cl_context    context ,
+                           cl_mem_flags  flags ,
+                           GLuint        renderbuffer ,
+                           cl_int *      errcode_ret) ;
+
+typedef cl_int
+(CL_API_CALL *clGetGLObjectInfo_fn)(cl_mem                 memobj ,
+                  cl_gl_object_type *    gl_object_type ,
+                  GLuint *               gl_object_name) ;
+
+typedef cl_int
+(CL_API_CALL *clGetGLTextureInfo_fn)(cl_mem                memobj ,
+                   cl_gl_texture_info    param_name ,
+                   size_t                param_value_size ,
+                   void *                param_value ,
+                   size_t *              param_value_size_ret) ;
+
+typedef cl_int
+(CL_API_CALL *clEnqueueAcquireGLObjects_fn)(cl_command_queue       command_queue ,
+                          cl_uint                num_objects ,
+                          const cl_mem *         mem_objects ,
+                          cl_uint                num_events_in_wait_list ,
+                          const cl_event *       event_wait_list ,
+                                cl_event *             event) ;
+
+typedef cl_int
+(CL_API_CALL *clEnqueueReleaseGLObjects_fn)(cl_command_queue       command_queue ,
+                          cl_uint                num_objects ,
+                          const cl_mem *         mem_objects ,
+                          cl_uint                num_events_in_wait_list ,
+                          const cl_event *       event_wait_list ,
+                                cl_event *             event) ;
+
+
+extern clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
+extern clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
+extern clCreateFromGLTexture2D_fn clCreateFromGLTexture2D_ptr;
+extern clCreateFromGLTexture3D_fn clCreateFromGLTexture3D_ptr;
+extern clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
+extern clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
+extern clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
+extern clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
+extern clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
+
+
+class glBufferWrapper
+{
+    public:
+        glBufferWrapper() { mBuffer = 0; }
+        glBufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
+
+        glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+class glTextureWrapper
+{
+    public:
+        glTextureWrapper() { mHandle = 0; }
+        glTextureWrapper( GLuint b ) { mHandle = b; }
+        ~glTextureWrapper() {
+         if( mHandle != 0 ) glDeleteTextures( 1, &mHandle );
+        }
+
+        glTextureWrapper & operator=( const GLuint &rhs ) { mHandle = rhs; return *this; }
+        operator GLuint() { return mHandle; }
+        operator GLuint *() { return &mHandle; }
+
+        GLuint * operator&() { return &mHandle; }
+
+        bool operator==( GLuint rhs ) { return mHandle == rhs; }
+
+    protected:
+
+    // The texture handle.
+        GLuint mHandle;
+};
+
+class glRenderbufferWrapper
+{
+    public:
+        glRenderbufferWrapper() { mBuffer = 0; }
+        glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
+
+        glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+class glFramebufferWrapper
+{
+    public:
+        glFramebufferWrapper() { mBuffer = 0; }
+        glFramebufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
+
+        glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+
+// Helper functions (defined in helpers.cpp)
+
+extern void * CreateGLTexture1DArray( size_t width, size_t length,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTextureID, int *outError,
+  bool allocateMem, MTdata d);
+
+extern void * CreateGLTexture2DArray( size_t width, size_t height, size_t length,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTextureID, int *outError,
+  bool allocateMem, MTdata d);
+
+extern void * CreateGLTextureBuffer( size_t width,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTex, GLuint *outBuf, int *outError,
+  bool allocateMem, MTdata d);
+
+extern void * CreateGLTexture1D(size_t width,
+                                GLenum target, GLenum glFormat,
+                                GLenum internalFormat, GLenum glType,
+                                ExplicitType type, GLuint *outTextureID,
+                                int *outError, bool allocateMem, MTdata d );
+
+extern void * CreateGLTexture2D( size_t width, size_t height,
+                               GLenum target, GLenum glFormat,
+                               GLenum internalFormat, GLenum glType,
+                               ExplicitType type, GLuint *outTextureID,
+                               int *outError, bool allocateMem, MTdata d );
+
+
+extern void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
+                                 GLenum target, GLenum glFormat,
+                                 GLenum internalFormat, GLenum glType,
+                                 ExplicitType type, GLuint *outTextureID,
+                                 int *outError, MTdata d, bool allocateMem = true );
+
+extern void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GLuint glBuf, GLint width,
+                             GLenum glFormat, GLenum glInternalFormat,
+                             GLenum glType, ExplicitType typeToReadAs,
+                             size_t outWidth, size_t outHeight );
+
+extern int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
+                                   GLenum target, GLenum glFormat,
+                                   GLenum internalFormat, GLenum glType,
+                                   GLuint *outFramebuffer,
+                                   GLuint *outRenderbuffer );
+
+extern void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
+                                    GLenum target, GLenum glFormat,
+                                    GLenum internalFormat, GLenum glType,
+                                    ExplicitType type,
+                                    GLuint *outFramebuffer,
+                                    GLuint *outRenderbuffer,
+                                    int *outError, MTdata d, bool allocateMem );
+
+extern void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
+                                  GLenum attachment, GLenum glFormat,
+                                  GLenum glInternalFormat, GLenum glType,
+                                  ExplicitType typeToReadAs,
+                                  size_t outWidth, size_t outHeight );
+
+extern void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer);
+extern const char *GetGLTypeName( GLenum type );
+extern const char *GetGLAttachmentName( GLenum att );
+extern const char *GetGLTargetName( GLenum tgt );
+extern const char *GetGLBaseFormatName( GLenum baseformat );
+extern const char *GetGLFormatName( GLenum format );
+
+extern void* CreateRandomData( ExplicitType type, size_t count, MTdata d );
+
+extern GLenum GetGLFormat(GLenum internalFormat);
+extern GLenum GetGLTypeForExplicitType(ExplicitType type);
+extern size_t GetGLTypeSize(GLenum type);
+extern ExplicitType GetExplicitTypeForGLType(GLenum type);
+
+extern GLenum get_base_gl_target( GLenum target );
+
+extern int init_clgl_ext( void );
+
+#endif // _helpers_h
+
+
+
--- a/test_conformance/compatibility/test_common/gl/setup.h
+++ b/test_conformance/compatibility/test_common/gl/setup.h
@@ -0,0 +1,48 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _setup_h
+#define _setup_h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gl_headers.h"
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+
+
+// Note: the idea here is to have every platform define their own setup.cpp file that implements a GLEnvironment
+// subclass internally, then return it as a definition for GLEnvironment::Create
+
+class GLEnvironment
+{
+    public:
+        GLEnvironment() {}
+        virtual ~GLEnvironment() {}
+
+         virtual int Init( int *argc, char **argv, int use_opengl_32 ) = 0;
+        virtual cl_context CreateCLContext( void ) = 0;
+        virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
+
+        static GLEnvironment *    Instance( void );
+
+
+};
+
+#endif // _setup_h
--- a/test_conformance/compatibility/test_common/gl/setup_osx.cpp
+++ b/test_conformance/compatibility/test_common/gl/setup_osx.cpp
@@ -0,0 +1,156 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "setup.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include <OpenGL/CGLDevice.h>
+
+class OSXGLEnvironment : public GLEnvironment
+{
+    public:
+        OSXGLEnvironment()
+        {
+      mCGLContext = NULL;
+        }
+
+  virtual int Init( int *argc, char **argv, int use_opengl_32 )
+        {
+      if (!use_opengl_32) {
+
+        // Create a GLUT window to render into
+        glutInit( argc, argv );
+        glutInitWindowSize( 512, 512 );
+        glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
+        glutCreateWindow( "OpenCL <-> OpenGL Test" );
+      }
+
+      else {
+
+        CGLPixelFormatAttribute attribs[] = {
+          kCGLPFAOpenGLProfile, (CGLPixelFormatAttribute)kCGLOGLPVersion_3_2_Core,
+          kCGLPFAAllowOfflineRenderers,
+          kCGLPFANoRecovery,
+          kCGLPFAAccelerated,
+          kCGLPFADoubleBuffer,
+          (CGLPixelFormatAttribute)0
+        };
+
+        CGLError err;
+        CGLPixelFormatObj pix;
+        GLint npix;
+        err = CGLChoosePixelFormat (attribs, &pix, &npix);
+        if(err != kCGLNoError)
+          {
+            log_error("Failed to choose pixel format\n");
+            return -1;
+          }
+        err = CGLCreateContext(pix, NULL, &mCGLContext);
+        if(err != kCGLNoError)
+          {
+            log_error("Failed to create GL context\n");
+            return -1;
+          }
+        CGLSetCurrentContext(mCGLContext);
+      }
+
+            return 0;
+        }
+
+        virtual cl_context CreateCLContext( void )
+    {
+      int error;
+
+      if( mCGLContext == NULL )
+        mCGLContext = CGLGetCurrentContext();
+
+      CGLShareGroupObj share_group = CGLGetShareGroup(mCGLContext);
+      cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)share_group, 0 };
+      cl_context context = clCreateContext(properties, 0, 0, 0, 0, &error);
+      if (error) {
+        print_error(error, "clCreateContext failed");
+        return NULL;
+      }
+
+      // Verify that all devices in the context support the required extension
+      cl_device_id devices[64];
+      size_t size_out;
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &size_out);
+      if (error) {
+        print_error(error, "clGetContextInfo failed");
+        return NULL;
+      }
+
+      char extensions[8192];
+      for (int i=0; i<(int)(size_out/sizeof(cl_device_id)); i++) {
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+        if (error) {
+          print_error(error, "clGetDeviceInfo failed");
+          return NULL;
+        }
+
+        if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
+          log_error("Device %d does not supporte required extension cl_APPLE_gl_sharing.\n", i);
+          return NULL;
+        }
+      }
+      return context;
+    }
+
+    virtual int SupportsCLGLInterop( cl_device_type device_type )
+    {
+      int found_valid_device = 0;
+      cl_device_id devices[64];
+      cl_uint num_of_devices;
+      int error;
+      error = clGetDeviceIDs(NULL, device_type, 64, devices, &num_of_devices);
+      if (error) {
+        print_error(error, "clGetDeviceIDs failed");
+        return -1;
+      }
+
+      char extensions[8192];
+      for (int i=0; i<(int)num_of_devices; i++) {
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+        if (error) {
+          print_error(error, "clGetDeviceInfo failed");
+          return -1;
+        }
+
+        if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
+          log_info("Device %d of %d does not support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
+        } else {
+          log_info("Device %d of %d does support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
+          found_valid_device = 1;
+        }
+      }
+            return found_valid_device;
+    }
+
+        virtual ~OSXGLEnvironment()
+        {
+            CGLDestroyContext( mCGLContext );
+        }
+
+        CGLContextObj mCGLContext;
+
+};
+
+GLEnvironment * GLEnvironment::Instance( void )
+{
+    static OSXGLEnvironment * env = NULL;
+    if( env == NULL )
+        env = new OSXGLEnvironment();
+    return env;
+}
--- a/test_conformance/compatibility/test_common/gl/setup_win32.cpp
+++ b/test_conformance/compatibility/test_common/gl/setup_win32.cpp
@@ -0,0 +1,204 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define GL_GLEXT_PROTOTYPES
+
+#include "setup.h"
+#include "testBase.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#include <GL/gl.h>
+#include <GL/glut.h>
+#include <GL/glext.h>
+#include <GL/glut.h>
+#include <CL/cl_ext.h>
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
+    const cl_context_properties *properties,
+    cl_gl_context_info param_name,
+    size_t param_value_size,
+    void *param_value,
+    size_t *param_value_size_ret);
+
+// Rename references to this dynamically linked function to avoid
+// collision with static link version
+#define clGetGLContextInfoKHR clGetGLContextInfoKHR_proc
+static clGetGLContextInfoKHR_fn clGetGLContextInfoKHR;
+
+#define MAX_DEVICES 32
+
+class WGLEnvironment : public GLEnvironment
+{
+private:
+    cl_device_id m_devices[MAX_DEVICES];
+    int m_device_count;
+    cl_platform_id m_platform;
+
+public:
+    WGLEnvironment()
+    {
+        m_device_count = 0;
+        m_platform = 0;
+
+    }
+    virtual int Init( int *argc, char **argv, int use_opengl_32 )
+    {
+         // Create a GLUT window to render into
+        glutInit( argc, argv );
+        glutInitWindowSize( 512, 512 );
+        glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
+        glutCreateWindow( "OpenCL <-> OpenGL Test" );
+        glewInit();
+        return 0;
+    }
+
+    virtual cl_context CreateCLContext( void )
+    {
+        HGLRC hGLRC = wglGetCurrentContext();
+        HDC hDC = wglGetCurrentDC();
+        cl_context_properties properties[] = {
+            CL_CONTEXT_PLATFORM, (cl_context_properties) m_platform,
+            CL_GL_CONTEXT_KHR,   (cl_context_properties) hGLRC,
+            CL_WGL_HDC_KHR,      (cl_context_properties) hDC,
+            0
+        };
+        cl_device_id devices[MAX_DEVICES];
+        size_t dev_size;
+        cl_int status;
+
+        if (!hGLRC || !hDC) {
+            print_error(CL_INVALID_CONTEXT, "No GL context bound");
+            return 0;
+        }
+
+        if (!clGetGLContextInfoKHR) {
+            // As OpenCL for the platforms.  Warn if more than one platform found,
+            // since this might not be the platform we want.  By default, we simply
+            // use the first returned platform.
+
+            cl_uint nplatforms;
+            cl_platform_id platform;
+            clGetPlatformIDs(0, NULL, &nplatforms);
+            clGetPlatformIDs(1, &platform, NULL);
+
+            if (nplatforms > 1) {
+                log_info("clGetPlatformIDs returned multiple values.  This is not "
+                    "an error, but might result in obtaining incorrect function "
+                    "pointers if you do not want the first returned platform.\n");
+
+                // Show them the platform name, in case it is a problem.
+
+                size_t size;
+                char *name;
+
+                clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
+                name = (char*)malloc(size);
+                clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
+
+                log_info("Using platform with name: %s \n", name);
+                free(name);
+            }
+
+            clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetGLContextInfoKHR");
+            if (!clGetGLContextInfoKHR) {
+                print_error(CL_INVALID_PLATFORM, "Failed to query proc address for clGetGLContextInfoKHR");
+            }
+        }
+
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_DEVICES_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
+        if (status != CL_SUCCESS) {
+            print_error(status, "clGetGLContextInfoKHR failed");
+            return 0;
+        }
+        dev_size /= sizeof(cl_device_id);
+        log_info("GL context supports %d compute devices\n", dev_size);
+
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
+        if (status != CL_SUCCESS) {
+            print_error(status, "clGetGLContextInfoKHR failed");
+            return 0;
+        }
+
+        cl_device_id ctxDevice = m_devices[0];
+        if (dev_size > 0) {
+            log_info("GL context current device: 0x%x\n", devices[0]);
+            for (int i = 0; i < m_device_count; i++) {
+                if (m_devices[i] == devices[0]) {
+                    ctxDevice = devices[0];
+                    break;
+                }
+            }
+        } else {
+            log_info("GL context current device is not a CL device, using device %d.\n", ctxDevice);
+        }
+
+        return clCreateContext(properties, 1, &ctxDevice, NULL, NULL, &status);
+    }
+
+    virtual int SupportsCLGLInterop( cl_device_type device_type )
+    {
+        cl_device_id devices[MAX_DEVICES];
+        cl_uint num_of_devices;
+        int error;
+        error = clGetPlatformIDs(1, &m_platform, NULL);
+        if (error) {
+            print_error(error, "clGetPlatformIDs failed");
+            return -1;
+        }
+        error = clGetDeviceIDs(m_platform, device_type, MAX_DEVICES, devices, &num_of_devices);
+        if (error) {
+            print_error(error, "clGetDeviceIDs failed");
+            return -1;
+        }
+
+        // Check all devices, search for one that supports cl_khr_gl_sharing
+        char extensions[8192];
+        for (int i=0; i<(int)num_of_devices; i++) {
+            error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+            if (error) {
+                print_error(error, "clGetDeviceInfo failed");
+                return -1;
+            }
+
+            if (strstr(extensions, "cl_khr_gl_sharing") == NULL) {
+                log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+            } else {
+                log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+                m_devices[m_device_count++] = devices[i];
+            }
+        }
+        return m_device_count > 0;
+    }
+
+    virtual ~WGLEnvironment()
+    {
+    }
+};
+
+GLEnvironment * GLEnvironment::Instance( void )
+{
+    static WGLEnvironment * env = NULL;
+    if( env == NULL )
+        env = new WGLEnvironment();
+    return env;
+}
--- a/test_conformance/compatibility/test_common/gl/setup_x11.cpp
+++ b/test_conformance/compatibility/test_common/gl/setup_x11.cpp
@@ -0,0 +1,122 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define GL_GLEXT_PROTOTYPES
+
+#include "setup.h"
+#include "testBase.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#include <GL/gl.h>
+#include <GL/glut.h>
+#include <GL/glext.h>
+#include <GL/freeglut.h>
+#include <GL/glx.h>
+#include <CL/cl_ext.h>
+
+class X11GLEnvironment : public GLEnvironment
+{
+private:
+    cl_device_id m_devices[64];
+    cl_uint m_device_count;
+
+public:
+    X11GLEnvironment()
+    {
+        m_device_count = 0;
+    }
+    virtual int Init( int *argc, char **argv, int use_opencl_32 )
+    {
+         // Create a GLUT window to render into
+        glutInit( argc, argv );
+        glutInitWindowSize( 512, 512 );
+        glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
+        glutCreateWindow( "OpenCL <-> OpenGL Test" );
+        glewInit();
+        return 0;
+    }
+
+    virtual cl_context CreateCLContext( void )
+    {
+        GLXContext context = glXGetCurrentContext();
+        Display *dpy = glXGetCurrentDisplay();
+
+        cl_context_properties properties[] = {
+            CL_GL_CONTEXT_KHR,  (cl_context_properties) context,
+            CL_GLX_DISPLAY_KHR, (cl_context_properties) dpy,
+            0
+        };
+        cl_int status;
+
+        if (!context || !dpy) {
+            print_error(CL_INVALID_CONTEXT, "No GL context bound");
+            return 0;
+        }
+
+        return clCreateContext(properties, 1, m_devices, NULL, NULL, &status);
+    }
+
+    virtual int SupportsCLGLInterop( cl_device_type device_type )
+    {
+        int found_valid_device = 0;
+        cl_platform_id platform;
+        cl_device_id devices[64];
+        cl_uint num_of_devices;
+        int error;
+        error = clGetPlatformIDs(1, &platform, NULL);
+        if (error) {
+            print_error(error, "clGetPlatformIDs failed");
+            return -1;
+        }
+        error = clGetDeviceIDs(platform, device_type, 64, devices, &num_of_devices);
+        // If this platform doesn't have any of the requested device_type (namely GPUs) then return 0
+        if (error == CL_DEVICE_NOT_FOUND)
+          return 0;
+        if (error) {
+            print_error(error, "clGetDeviceIDs failed");
+            return -1;
+        }
+
+        char extensions[8192];
+        for (int i=0; i<(int)num_of_devices; i++) {
+            error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+            if (error) {
+                print_error(error, "clGetDeviceInfo failed");
+                return -1;
+            }
+
+            if (strstr(extensions, "cl_khr_gl_sharing ") == NULL) {
+                log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+            } else {
+                log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+                found_valid_device = 1;
+                m_devices[m_device_count++] = devices[i];
+            }
+        }
+        return found_valid_device;
+    }
+
+    virtual ~X11GLEnvironment()
+    {
+    }
+};
+
+GLEnvironment * GLEnvironment::Instance( void )
+{
+    static X11GLEnvironment * env = NULL;
+    if( env == NULL )
+        env = new X11GLEnvironment();
+    return env;
+}
--- a/test_conformance/compatibility/test_common/harness/Jamfile
+++ b/test_conformance/compatibility/test_common/harness/Jamfile
@@ -0,0 +1,18 @@
+project
+    : requirements <include>.
+      <toolset>gcc:<cflags>"-xc++"
+      <toolset>msvc:<cflags>"/TP"
+      <warnings-as-errors>off
+    : usage-requirements <include>.
+    ;
+
+local harness.objs ;
+for source in [ glob *.c *.cpp ]
+{
+    harness.objs += [ obj $(source:B).obj : $(source) ] ;
+}
+
+alias harness : $(harness.objs)
+    : <use>/Runtime//OpenCL.lib : 
+    : <library>/Runtime//OpenCL.lib
+    ;
--- a/test_conformance/compatibility/test_common/harness/Makefile
+++ b/test_conformance/compatibility/test_common/harness/Makefile
@@ -0,0 +1,41 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = conversions.c \
+	errorHelpers.c \
+	genericThread.cpp \
+	imageHelpers.cpp \
+	kernelHelpers.c \
+	mt19937.c \
+	rounding_mode.c \
+	testHarness.c \
+	testHarness.cpp \
+	ThreadPool.c \
+	threadTesting.c \
+	typeWrappers.cpp
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+all: $(OBJECTS)
+
+clean:
+	rm -f $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
--- a/test_conformance/compatibility/test_common/harness/ThreadPool.c
+++ b/test_conformance/compatibility/test_common/harness/ThreadPool.c
@@ -0,0 +1,899 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "ThreadPool.h"
+#include "errorHelpers.h"
+#include "fpcontrol.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#if  defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 )  // or any other POSIX system
+
+#if defined( _WIN32 )
+#include <windows.h>
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+#include "mingw_compat.h"
+#include <process.h>
+#else // !_WIN32
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/errno.h>
+#endif // !_WIN32
+
+// declarations
+#ifdef  _WIN32
+void ThreadPool_WorkerFunc( void *p );
+#else
+void *ThreadPool_WorkerFunc( void *p );
+#endif
+void ThreadPool_Init(void);
+void ThreadPool_Exit(void);
+
+#if defined (__MINGW32__)
+    // Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
+    CRITICAL_SECTION     gAtomicLock;
+#elif defined( __GNUC__ ) || defined( _MSC_VER)
+#else
+    pthread_mutex_t     gAtomicLock;
+#endif
+
+// Atomic add operator with mem barrier.  Mem barrier needed to protect state modified by the worker functions.
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
+{
+#if defined (__MINGW32__)
+    // No atomics on Mingw32
+    EnterCriticalSection(&gAtomicLock);
+    cl_int old = *a;
+    *a = old + b;
+    LeaveCriticalSection(&gAtomicLock);
+    return old;
+#elif defined( __GNUC__ )
+    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+    return __sync_fetch_and_add( a, b );
+    // do we need __sync_synchronize() here, too?  GCC docs are unclear whether __sync_fetch_and_add does a synchronize
+#elif defined( _MSC_VER )
+    return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
+#else
+    #warning  Please add a atomic add implementation here, with memory barrier.  Fallback code is slow.
+    if( pthread_mutex_lock(&gAtomicLock) )
+        log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
+    cl_int old = *a;
+    *a = old + b;
+    if( pthread_mutex_unlock(&gAtomicLock) )
+        log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
+    return old;
+#endif
+}
+
+#if defined( _WIN32 )
+// Uncomment the following line if Windows XP support is not required.
+// #define HAS_INIT_ONCE_EXECUTE_ONCE 1
+
+#if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
+#define _INIT_ONCE           INIT_ONCE
+#define _PINIT_ONCE          PINIT_ONCE
+#define _InitOnceExecuteOnce InitOnceExecuteOnce
+#else // !HAS_INIT_ONCE_EXECUTE_ONCE
+
+typedef volatile LONG _INIT_ONCE;
+typedef _INIT_ONCE *_PINIT_ONCE;
+typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
+
+#define _INIT_ONCE_UNINITIALIZED 0
+#define _INIT_ONCE_IN_PROGRESS   1
+#define _INIT_ONCE_DONE          2
+
+static BOOL _InitOnceExecuteOnce(
+  _PINIT_ONCE InitOnce,
+  _PINIT_ONCE_FN InitFn,
+  PVOID Parameter,
+  LPVOID *Context
+)
+{
+    while ( *InitOnce != _INIT_ONCE_DONE )
+    {
+        if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
+        {
+            InitFn( InitOnce, Parameter, Context );
+            *InitOnce = _INIT_ONCE_DONE;
+            return TRUE;
+        }
+        Sleep( 1 );
+    }
+    return TRUE;
+}
+#endif // !HAS_INIT_ONCE_EXECUTE_ONCE
+
+// Uncomment the following line if Windows XP support is not required.
+// #define HAS_CONDITION_VARIABLE 1
+
+#if defined(HAS_CONDITION_VARIABLE)
+#define _CONDITION_VARIABLE          CONDITION_VARIABLE
+#define _InitializeConditionVariable InitializeConditionVariable
+#define _SleepConditionVariableCS    SleepConditionVariableCS
+#define _WakeAllConditionVariable    WakeAllConditionVariable
+#else // !HAS_CONDITION_VARIABLE
+typedef struct
+{
+    HANDLE           mEvent; // Used to park the thread.
+    CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
+    volatile cl_int  mWaiters; // Number of threads waiting on this cond var.
+    volatile cl_int  mGeneration; // Wait generation count.
+    volatile cl_int  mReleaseCount; // Number of releases to execute before reseting the event.
+} _CONDITION_VARIABLE;
+
+typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
+
+static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
+{
+    cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
+    InitializeCriticalSection( cond_var->mLock );
+    cond_var->mWaiters = 0;
+    cond_var->mGeneration = 0;
+#if !defined ( NDEBUG )
+    cond_var->mReleaseCount = 0;
+#endif // !NDEBUG
+}
+
+static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
+{
+    EnterCriticalSection( cond_var->mLock );
+    cl_int generation = cond_var->mGeneration;
+    ++cond_var->mWaiters;
+    LeaveCriticalSection( cond_var->mLock );
+    LeaveCriticalSection( cond_lock );
+
+    while ( TRUE )
+    {
+        WaitForSingleObject( cond_var->mEvent, INFINITE );
+        EnterCriticalSection( cond_var->mLock );
+        BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
+        LeaveCriticalSection( cond_var->mLock );
+        if ( done )
+        {
+            break;
+        }
+    }
+
+    EnterCriticalSection( cond_lock );
+    EnterCriticalSection( cond_var->mLock );
+    if ( --cond_var->mReleaseCount == 0 )
+    {
+        ResetEvent( cond_var->mEvent );
+    }
+    --cond_var->mWaiters;
+    LeaveCriticalSection( cond_var->mLock );
+}
+
+static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
+{
+    EnterCriticalSection( cond_var->mLock );
+    if (cond_var->mWaiters > 0 )
+    {
+        ++cond_var->mGeneration;
+        cond_var->mReleaseCount = cond_var->mWaiters;
+        SetEvent( cond_var->mEvent );
+    }
+    LeaveCriticalSection( cond_var->mLock );
+}
+#endif // !HAS_CONDITION_VARIABLE
+#endif // _WIN32
+
+#define MAX_COUNT   (1<<29)
+
+// Global state to coordinate whether the threads have been launched successfully or not
+#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
+static _INIT_ONCE threadpool_init_control;
+#elif defined (_WIN32)  // MingW of XP
+static int threadpool_init_control;
+#else // Posix platforms
+pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
+#endif
+cl_int threadPoolInitErr = -1;          // set to CL_SUCCESS on successful thread launch
+
+// critical region lock around ThreadPool_Do.  We can only run one ThreadPool_Do at a time,
+// because we are too lazy to set up a queue here, and don't expect to need one.
+#if defined( _WIN32 )
+CRITICAL_SECTION    gThreadPoolLock[1];
+#else // !_WIN32
+pthread_mutex_t     gThreadPoolLock;
+#endif // !_WIN32
+
+// Condition variable to park ThreadPool threads when not working
+#if defined( _WIN32 )
+CRITICAL_SECTION    cond_lock[1];
+_CONDITION_VARIABLE cond_var[1];
+#else // !_WIN32
+pthread_mutex_t     cond_lock;
+pthread_cond_t      cond_var;
+#endif // !_WIN32
+volatile cl_int     gRunCount = 0;              // Condition variable state. How many iterations on the function left to run.
+                                                // set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
+
+// State that only changes when the threadpool is not working.
+volatile TPFuncPtr  gFunc_ptr = NULL;
+volatile void       *gUserInfo = NULL;
+volatile cl_int     gJobCount = 0;
+
+// State that may change while the thread pool is working
+volatile cl_int     jobError = CL_SUCCESS;      // err code return for the job as a whole
+
+// Condition variable to park caller while waiting
+#if defined( _WIN32 )
+HANDLE              caller_event;
+#else // !_WIN32
+pthread_mutex_t     caller_cond_lock;
+pthread_cond_t      caller_cond_var;
+#endif // !_WIN32
+volatile cl_int     gRunning = 0;       // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
+
+// The total number of threads launched.
+volatile cl_int     gThreadCount = 0;
+#ifdef _WIN32
+void ThreadPool_WorkerFunc( void *p )
+#else
+void *ThreadPool_WorkerFunc( void *p )
+#endif
+{
+    cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
+    cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+//    log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
+
+    while( MAX_COUNT > item )
+    {
+        cl_int err;
+
+        // check for more work to do
+        if( 0 >= item )
+        {
+//            log_info( "Thread %d has run out of work.\n", threadID );
+
+            // No work to do. Attempt to block waiting for work
+#if defined( _WIN32 )
+            EnterCriticalSection( cond_lock );
+#else // !_WIN32
+            if((err = pthread_mutex_lock( &cond_lock) ))
+            {
+                log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
+                goto exit;
+            }
+#endif // !_WIN32
+
+            cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
+//            log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
+            if( 1 == remaining )
+            { // last thread out signal the main thread to wake up
+#if defined( _WIN32 )
+                SetEvent( caller_event );
+#else // !_WIN32
+                if((err = pthread_mutex_lock( &caller_cond_lock) ))
+                {
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
+                    goto exit;
+                }
+                if( (err = pthread_cond_broadcast( &caller_cond_var )))
+                {
+                    log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
+                    goto exit;
+                }
+                if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+                {
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
+                    goto exit;
+                }
+#endif // !_WIN32
+            }
+
+            // loop in case we are woken only to discover that some other thread already did all the work
+            while( 0 >= item )
+            {
+#if defined( _WIN32 )
+                _SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
+#else // !_WIN32
+                if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
+                {
+                    log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
+                    pthread_mutex_unlock( &cond_lock);
+                    goto exit;
+                }
+#endif // !_WIN32
+
+                // try again to get a valid item id
+                item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+                if( MAX_COUNT <= item )  // exit if we are done
+                {
+#if defined( _WIN32 )
+                    LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+                    pthread_mutex_unlock( &cond_lock);
+#endif // !_WIN32
+                    goto exit;
+                }
+            }
+
+            ThreadPool_AtomicAdd( &gRunning, 1 );
+//            log_info( "Thread %d has found work.\n", threadID);
+
+#if defined( _WIN32 )
+            LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+            if((err = pthread_mutex_unlock( &cond_lock) ))
+            {
+                log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
+                goto exit;
+            }
+#endif // !_WIN32
+
+        }
+
+        // we have a valid item, so do the work
+        if( CL_SUCCESS == jobError )  // but only if we haven't already encountered an error
+        {
+//            log_info( "Thread %d doing job %d\n", threadID, item - 1);
+
+#if defined(__APPLE__) && defined(__arm__)
+            // On most platforms which support denorm, default is FTZ off. However,
+            // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+            // This creates issues in result verification. Since spec allows the implementation to either flush or
+            // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+            // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+            // where reference is being computed to make sure we get non-flushed reference result. If implementation
+            // returns flushed result, we correctly take care of that in verification code.
+            FPU_mode_type oldMode;
+            DisableFTZ( &oldMode );
+#endif
+
+            // Call the user's function with this item ID
+            err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
+#if defined(__APPLE__) && defined(__arm__)
+            // Restore FP state
+            RestoreFPState( &oldMode );
+#endif
+
+            if( err )
+            {
+#if (__MINGW32__)
+                EnterCriticalSection(&gAtomicLock);
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
+                gRunCount = 0;
+                LeaveCriticalSection(&gAtomicLock);
+#elif defined( __GNUC__ )
+                // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+                // set the new error if we are the first one there.
+                __sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
+
+                // drop run count to 0
+                gRunCount = 0;
+                __sync_synchronize();
+#elif defined( _MSC_VER )
+                // set the new error if we are the first one there.
+                _InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
+
+                // drop run count to 0
+                gRunCount = 0;
+                _mm_mfence();
+#else
+                if( pthread_mutex_lock(&gAtomicLock) )
+                    log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
+                gRunCount = 0;
+                if( pthread_mutex_unlock(&gAtomicLock) )
+                    log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
+#endif
+            }
+        }
+
+        // get the next item
+        item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+    }
+
+exit:
+    log_info( "ThreadPool: thread %d exiting.\n", threadID );
+    ThreadPool_AtomicAdd( &gThreadCount, -1 );
+#if !defined(_WIN32)
+    return NULL;
+#endif
+}
+
+// SetThreadCount() may be used to artifically set the number of worker threads
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
+//
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
+//
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined.
+void        SetThreadCount( int count )
+{
+    if( threadPoolInitErr == CL_SUCCESS )
+    {
+        log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
+        abort();
+    }
+
+    gThreadCount = count;
+}
+
+void ThreadPool_Init(void)
+{
+    cl_int i;
+    int err;
+    volatile cl_uint threadID = 0;
+
+    // Check for manual override of multithreading code. We add this for better debuggability.
+    if( getenv( "CL_TEST_SINGLE_THREADED" ) )
+    {
+        log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
+        gThreadCount = 1;
+        return;
+    }
+
+    // Figure out how many threads to run -- check first for non-zero to give the implementation the chance
+    if( 0 == gThreadCount )
+    {
+#if defined(_MSC_VER) || defined (__MINGW64__)
+        PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
+        DWORD length = 0;
+
+        GetLogicalProcessorInformation( NULL, &length );
+        buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
+        if( buffer != NULL && GetLogicalProcessorInformation( buffer, &length ) == TRUE )
+        {
+            PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
+            while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
+            {
+                if( ptr->Relationship == RelationProcessorCore )
+                {
+                    // Count the number of bits in ProcessorMask (number of logical cores)
+                    ULONG mask = ptr->ProcessorMask;
+                    while( mask )
+                    {
+                        ++gThreadCount;
+                        mask &= mask - 1; // Remove 1 bit at a time
+                    }
+                }
+                ++ptr;
+            }
+            free(buffer);
+        }
+#elif defined (__MINGW32__)
+        {
+            #warning  How about this, instead of hard coding it to 2?
+            SYSTEM_INFO sysinfo;
+            GetSystemInfo( &sysinfo );
+            gThreadCount = sysinfo.dwNumberOfProcessors;
+        }
+#else // !_WIN32
+        gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
+#endif // !_WIN32
+
+        // Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
+        if( 1 == gThreadCount )
+            gThreadCount = 2;
+    }
+
+    //Allow the app to set thread count to <0 for debugging purposes.  This will cause the test to run single threaded.
+    if( gThreadCount < 2 )
+    {
+        log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
+        gThreadCount = 1;
+        return;
+    }
+
+#if defined( _WIN32 )
+    InitializeCriticalSection( gThreadPoolLock );
+    InitializeCriticalSection( cond_lock );
+    _InitializeConditionVariable( cond_var );
+    caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
+#elif defined (__GNUC__)
+    // Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
+    // with some flavors of gcc compilers.
+    pthread_cond_init(&cond_var, NULL);
+    pthread_mutex_init(&cond_lock ,NULL);
+    pthread_cond_init(&caller_cond_var, NULL);
+    pthread_mutex_init(&caller_cond_lock, NULL);
+    pthread_mutex_init(&gThreadPoolLock, NULL);
+#endif
+
+#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
+    pthread_mutex_initialize(gAtomicLock);
+#elif defined (__MINGW32__)
+    InitializeCriticalSection(&gAtomicLock);
+#endif
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
+    //  That would cause a deadlock.
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+        gThreadCount = 1;
+        return;
+    }
+#endif // !_WIN32
+
+    gRunning = gThreadCount;
+    // init threads
+    for( i = 0; i < gThreadCount; i++ )
+    {
+#if defined( _WIN32 )
+        uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
+        err = ( handle == 0 );
+#else // !_WIN32
+        pthread_t tid = 0;
+        err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
+#endif // !_WIN32
+        if( err )
+        {
+            log_error( "Error %d launching thread %d\n", err, i );
+            threadPoolInitErr = err;
+            gThreadCount = i;
+            break;
+        }
+    }
+
+    atexit( ThreadPool_Exit );
+
+// block until they are done launching.
+    do
+    {
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
+#else // !_WIN32
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
+        {
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
+            return;
+        }
+#endif // !_WIN32
+    }
+    while( gRunCount != -gThreadCount );
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+        return;
+    }
+#endif // !_WIN32
+
+    threadPoolInitErr = CL_SUCCESS;
+}
+
+#if defined(_MSC_VER)
+static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
+{
+    ThreadPool_Init();
+    return TRUE;
+}
+#endif
+
+void ThreadPool_Exit(void)
+{
+    int err, count;
+    gRunCount = CL_INT_MAX;
+
+#if defined( __GNUC__ )
+    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+    __sync_synchronize();
+#elif defined( _MSC_VER )
+    _mm_mfence();
+#else
+    #warning   If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
+#endif
+
+    // spin waiting for threads to die
+    for (count = 0; 0 != gThreadCount && count < 1000; count++)
+    {
+#if defined( _WIN32 )
+        _WakeAllConditionVariable( cond_var );
+        Sleep(1);
+#else // !_WIN32
+        if( (err = pthread_cond_broadcast( &cond_var )))
+        {
+            log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
+            break;
+        }
+        usleep(1000);
+#endif // !_WIN32
+    }
+
+    if( gThreadCount )
+        log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
+    else
+        log_info( "Thread pool exited in a orderly fashion.\n" );
+}
+
+
+// Blocking API that farms out count jobs to a thread pool.
+// It may return with some work undone if func_ptr() returns a non-zero
+// result.
+//
+// This function obviously has its shortcommings. Only one call to ThreadPool_Do
+// can be running at a time. It is not intended for general purpose use.
+// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
+// all available then it would make more sense to use those features.
+cl_int ThreadPool_Do( TPFuncPtr func_ptr,
+                      cl_uint count,
+                      void *userInfo )
+{
+    cl_int newErr;
+    cl_int err = 0;
+    // Lazily set up our threads
+#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
+    err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
+        ThreadPool_Init();
+        threadpool_init_control = 1;
+    }
+#else //posix platform
+    err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
+    {
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
+        return err;
+    }
+#endif
+    // Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
+    if( threadPoolInitErr )
+    {
+        cl_uint currentJob = 0;
+        cl_int  result = CL_SUCCESS;
+
+#if defined(__APPLE__) && defined(__arm__)
+        // On most platforms which support denorm, default is FTZ off. However,
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
+        FPU_mode_type oldMode;
+        DisableFTZ( &oldMode );
+#endif
+        for( currentJob = 0; currentJob < count; currentJob++ )
+            if((result = func_ptr( currentJob, 0, userInfo )))
+            {
+#if defined(__APPLE__) && defined(__arm__)
+                // Restore FP state before leaving
+                RestoreFPState( &oldMode );
+#endif
+                return result;
+            }
+
+#if defined(__APPLE__) && defined(__arm__)
+        // Restore FP state before leaving
+        RestoreFPState( &oldMode );
+#endif
+
+        return CL_SUCCESS;
+    }
+
+    if( count >= MAX_COUNT )
+    {
+        log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
+        return -1;
+    }
+
+    // Enter critical region
+#if defined( _WIN32 )
+    EnterCriticalSection( gThreadPoolLock );
+#else // !_WIN32
+    if( (err = pthread_mutex_lock( &gThreadPoolLock )))
+    {
+        switch (err)
+        {
+            case EDEADLK:
+                log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
+                break;
+            case EINVAL:
+                log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
+                break;
+            default:
+                break;
+        }
+        return err;
+    }
+#endif // !_WIN32
+
+    // Start modifying the job state observable by worker threads
+#if defined( _WIN32 )
+    EnterCriticalSection( cond_lock );
+#else // !_WIN32
+    if((err = pthread_mutex_lock( &cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
+    //  That would cause a deadlock.
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    // Prime the worker threads to get going
+    jobError = CL_SUCCESS;
+    gRunCount = gJobCount = count;
+    gFunc_ptr = func_ptr;
+    gUserInfo = userInfo;
+
+#if defined( _WIN32 )
+    _WakeAllConditionVariable( cond_var );
+    LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+    if( (err = pthread_cond_broadcast( &cond_var )))
+    {
+        log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+    if((err = pthread_mutex_unlock( &cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+// block until they are done.  It would be slightly more efficient to do some of the work here though.
+    do
+    {
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
+#else // !_WIN32
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
+        {
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
+            goto exit;
+        }
+#endif // !_WIN32
+    }
+    while( gRunning );
+#if !defined(_WIN32)
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    err = jobError;
+
+exit:
+    // exit critical region
+#if defined( _WIN32 )
+    LeaveCriticalSection( gThreadPoolLock );
+#else // !_WIN32
+    newErr = pthread_mutex_unlock( &gThreadPoolLock );
+    if( newErr)
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
+        return err;
+    }
+#endif // !_WIN32
+
+    return err;
+}
+
+cl_uint GetThreadCount( void )
+{
+    // Lazily set up our threads
+#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
+    cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
+        ThreadPool_Init();
+        threadpool_init_control = 1;
+    }
+#else
+    cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
+    {
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
+        return err;
+    }
+#endif // !_WIN32
+
+    if( gThreadCount < 1 )
+        return 1;
+
+    return gThreadCount;
+}
+
+#else
+
+#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
+    #error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
+#endif
+//
+// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
+// of OpenCL API, while also checking
+//
+// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
+// It is not okay to use this for conformance testing!!!
+//
+// Exception:  If your operating system does not support multithreaded execution of any kind, then you may use this code.
+//
+
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
+{
+    cl_uint r = *a;
+
+    // since this fallback code path is not multithreaded, we just do a regular add here
+    // If your operating system supports memory-barrier-atomics, use those here
+    *a = r + b;
+
+    return r;
+}
+
+// Blocking API that farms out count jobs to a thread pool.
+// It may return with some work undone if func_ptr() returns a non-zero
+// result.
+cl_int ThreadPool_Do(   TPFuncPtr func_ptr,
+                        cl_uint count,
+                        void *userInfo )
+{
+    cl_uint currentJob = 0;
+    cl_int  result = CL_SUCCESS;
+
+#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
+    // THIS FUNCTION IS NOT INTENDED FOR USE!!
+    log_error( "ERROR:  Test must be multithreaded!\n" );
+    exit(-1);
+#else
+    static int spewCount = 0;
+
+    if( 0 == spewCount )
+    {
+        log_info( "\nWARNING:  The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
+        spewCount = 1;
+    }
+#endif
+
+// The multithreaded code should mimic this behavior:
+    for( currentJob = 0; currentJob < count; currentJob++ )
+        if((result = func_ptr( currentJob, 0, userInfo )))
+            return result;
+
+    return CL_SUCCESS;
+}
+
+cl_uint GetThreadCount( void )
+{
+    return 1;
+}
+
+void SetThreadCount( int count )
+{
+    if( count > 1 )
+        log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
+}
+
+#endif
--- a/test_conformance/compatibility/test_common/harness/ThreadPool.h
+++ b/test_conformance/compatibility/test_common/harness/ThreadPool.h
@@ -0,0 +1,76 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef THREAD_POOL_H
+#define THREAD_POOL_H
+
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/cl.h>
+#endif
+
+#if defined(__cplusplus)
+    extern "C" {
+#endif
+
+//
+// An atomic add operator
+cl_int     ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b );    // returns old value
+
+// Your function prototype
+//
+// A function pointer to the function you want to execute in a multithreaded context.  No
+// synchronization primitives are provided, other than the atomic add above. You may not
+// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
+// work, however.
+//
+// job ids and thread ids are 0 based.  If number of jobs or threads was 8, they will numbered be 0 through 7.
+// Note that while every job will be run, it is not guaranteed that every thread will wake up before
+// the work is done.
+typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
+
+// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
+// Some workitems may not run if a non-zero result is returned from func_ptr().
+// This function may not be called from a TPFuncPtr.
+cl_int      ThreadPool_Do(  TPFuncPtr func_ptr,
+                            cl_uint count,
+                            void *userInfo );
+
+// Returns the number of worker threads that underlie the threadpool.  The value passed
+// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
+// This is safe to call from a TPFuncPtr.
+cl_uint     GetThreadCount( void );
+
+// SetThreadCount() may be used to artifically set the number of worker threads
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
+//
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
+//
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
+void        SetThreadCount( int count );
+
+#ifdef __cplusplus
+    }   /* extern "C" */
+#endif
+
+
+#endif  /* THREAD_POOL_H  */
--- a/test_conformance/compatibility/test_common/harness/clImageHelper.h
+++ b/test_conformance/compatibility/test_common/harness/clImageHelper.h
@@ -0,0 +1,253 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_clImageHelper_h
+#define test_conformance_clImageHelper_h
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include <stdio.h>
+#include "errorHelpers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+  // helper function to replace clCreateImage2D , to make the existing code use
+  // the functions of version 1.2 and veriosn 1.1  respectively
+
+  inline cl_mem create_image_2d  (cl_context context,
+                           cl_mem_flags flags,
+                           const cl_image_format *image_format,
+                           size_t image_width,
+                           size_t image_height,
+                           size_t image_row_pitch,
+                           void *host_ptr,
+                           cl_int *errcode_ret)
+  {
+    cl_mem mImage = NULL;
+
+#ifdef CL_VERSION_1_2
+    cl_image_desc image_desc_dest;
+    image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
+    image_desc_dest.image_width = image_width;
+    image_desc_dest.image_height = image_height;
+    image_desc_dest.image_depth= 0;// not usedfor 2d
+    image_desc_dest.image_array_size = 0;// not used for 2d
+    image_desc_dest.image_row_pitch = image_row_pitch;
+    image_desc_dest.image_slice_pitch = 0;
+    image_desc_dest.num_mip_levels = 0;
+    image_desc_dest.num_samples = 0;
+    image_desc_dest.buffer = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    }
+
+#else
+    mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
+    }
+#endif
+
+    return mImage;
+  }
+
+  inline cl_mem create_image_3d (cl_context context,
+                          cl_mem_flags flags,
+                          const cl_image_format *image_format,
+                          size_t image_width,
+                          size_t image_height,
+                          size_t image_depth,
+                          size_t image_row_pitch,
+                          size_t image_slice_pitch,
+                          void *host_ptr,
+                          cl_int *errcode_ret)
+  {
+    cl_mem mImage;
+
+#ifdef CL_VERSION_1_2
+    cl_image_desc image_desc;
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+    image_desc.image_width = image_width;
+    image_desc.image_height = image_height;
+    image_desc.image_depth = image_depth;
+    image_desc.image_array_size = 0;// not used for one image
+    image_desc.image_row_pitch = image_row_pitch;
+    image_desc.image_slice_pitch = image_slice_pitch;
+    image_desc.num_mip_levels = 0;
+    image_desc.num_samples = 0;
+    image_desc.buffer = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context,
+                           flags,
+                           image_format,
+                           &image_desc,
+                           host_ptr,
+                           errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    }
+
+#else
+    mImage = clCreateImage3D( context,
+                             flags, image_format,
+                             image_width,
+                             image_height,
+                             image_depth,
+                             image_row_pitch,
+                             image_slice_pitch,
+                             host_ptr,
+                             errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
+    }
+#endif
+
+    return mImage;
+  }
+
+    inline cl_mem create_image_2d_array (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_height,
+                                   size_t image_array_size,
+                                   size_t image_row_pitch,
+                                   size_t image_slice_pitch,
+                                   void *host_ptr,
+                                   cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = image_height;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.buffer = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+    inline cl_mem create_image_1d_array (cl_context context,
+                                         cl_mem_flags flags,
+                                         const cl_image_format *image_format,
+                                         size_t image_width,
+                                         size_t image_array_size,
+                                         size_t image_row_pitch,
+                                         size_t image_slice_pitch,
+                                         void *host_ptr,
+                                         cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.buffer = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+    inline cl_mem create_image_1d (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_row_pitch,
+                                   void *host_ptr,
+                                   cl_mem buffer,
+                                   cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = 0;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.buffer = buffer;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/test_conformance/compatibility/test_common/harness/compat.h
+++ b/test_conformance/compatibility/test_common/harness/compat.h
@@ -0,0 +1,210 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _COMPAT_H_
+#define _COMPAT_H_
+
+#if defined(_WIN32) && defined (_MSC_VER)
+
+#include <Windows.h>
+#include <Winbase.h>
+#include <CL/cl.h>
+#include <float.h>
+#include <xmmintrin.h>
+#include <math.h>
+
+#define MAKE_HEX_FLOAT(x,y,z)  ((float)ldexp( (float)(y), z))
+#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
+#define MAKE_HEX_LONG(x,y,z)   ((long double) ldexp( (long double)(y), z))
+
+#define isfinite(x) _finite(x)
+
+#if !defined(__cplusplus)
+typedef char bool;
+#define inline
+
+#else
+extern "C" {
+#endif
+
+typedef unsigned char       uint8_t;
+typedef char                int8_t;
+typedef unsigned short      uint16_t;
+typedef short               int16_t;
+typedef unsigned int        uint32_t;
+typedef int                 int32_t;
+typedef unsigned long long  uint64_t;
+typedef long long           int64_t;
+
+#define MAXPATHLEN MAX_PATH
+
+typedef unsigned short ushort;
+typedef unsigned int   uint;
+typedef unsigned long  ulong;
+
+
+#define INFINITY    (FLT_MAX + FLT_MAX)
+//#define NAN (INFINITY | 1)
+//const static int PINFBITPATT_SP32  = INFINITY;
+
+#ifndef M_PI
+    #define M_PI    3.14159265358979323846264338327950288
+#endif
+
+
+#define    isnan( x )       ((x) != (x))
+#define     isinf( _x)      ((_x) == INFINITY || (_x) == -INFINITY)
+
+double rint( double x);
+float  rintf( float x);
+long double rintl( long double x);
+
+float cbrtf( float );
+double cbrt( double );
+
+int    ilogb( double x);
+int    ilogbf (float x);
+int    ilogbl(long double x);
+
+double fmax(double x, double y);
+double fmin(double x, double y);
+float  fmaxf( float x, float y );
+float  fminf(float x, float y);
+
+double      log2(double x);
+long double log2l(long double x);
+
+double      exp2(double x);
+long double exp2l(long double x);
+
+double      fdim(double x, double y);
+float       fdimf(float x, float y);
+long double fdiml(long double x, long double y);
+
+double      remquo( double x, double y, int *quo);
+float       remquof( float x, float y, int *quo);
+long double remquol( long double x, long double y, int *quo);
+
+long double scalblnl(long double x, long n);
+
+inline long long
+llabs(long long __x) { return __x >= 0 ? __x : -__x; }
+
+
+// end of math functions
+
+uint64_t ReadTime( void );
+double SubtractTime( uint64_t endTime, uint64_t startTime );
+
+#define sleep(X)   Sleep(1000*X)
+#define snprintf   sprintf_s
+//#define hypotl     _hypot
+
+float   make_nan();
+float nanf( const char* str);
+double  nan( const char* str);
+long double nanl( const char* str);
+
+//#if defined USE_BOOST
+//#include <boost/math/tr1.hpp>
+//double hypot(double x, double y);
+float hypotf(float x, float y);
+long double hypotl(long double x, long double y) ;
+double lgamma(double x);
+float  lgammaf(float x);
+
+double trunc(double x);
+float  truncf(float x);
+
+double log1p(double x);
+float  log1pf(float x);
+long double log1pl(long double x);
+
+double copysign(double x, double y);
+float  copysignf(float x, float y);
+long double copysignl(long double x, long double y);
+
+long lround(double x);
+long lroundf(float x);
+//long lroundl(long double x)
+
+double round(double x);
+float  roundf(float x);
+long double roundl(long double x);
+
+int signbit(double x);
+int signbitf(float x);
+
+//bool signbitl(long double x)         { return boost::math::tr1::signbit<long double>(x); }
+//#endif // USE_BOOST
+
+long int lrint (double flt);
+long int lrintf (float flt);
+
+
+float   int2float (int32_t ix);
+int32_t float2int (float   fx);
+
+/** Returns the number of leading 0-bits in x,
+    starting at the most significant bit position.
+    If x is 0, the result is undefined.
+*/
+int __builtin_clz(unsigned int pattern);
+
+
+static const double zero=  0.00000000000000000000e+00;
+#define NAN  (INFINITY - INFINITY)
+#define HUGE_VALF (float)HUGE_VAL
+
+int usleep(int usec);
+
+// reimplement fenv.h because windows doesn't have it
+#define FE_INEXACT          0x0020
+#define FE_UNDERFLOW        0x0010
+#define FE_OVERFLOW         0x0008
+#define FE_DIVBYZERO        0x0004
+#define FE_INVALID          0x0001
+#define FE_ALL_EXCEPT       0x003D
+
+int fetestexcept(int excepts);
+int feclearexcept(int excepts);
+
+#ifdef __cplusplus
+}
+#endif
+
+#else // !((defined(_WIN32) && defined(_MSC_VER)
+#if defined(__MINGW32__)
+#include <windows.h>
+#define sleep(X)   Sleep(1000*X)
+
+#endif
+#if defined(__linux__) || defined(__MINGW32__) || defined(__APPLE__)
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+#include <fenv.h>
+#include <math.h>
+#include <float.h>
+#include <stdint.h>
+#endif
+#define MAKE_HEX_FLOAT(x,y,z) x
+#define MAKE_HEX_DOUBLE(x,y,z) x
+#define MAKE_HEX_LONG(x,y,z) x
+
+#endif // !((defined(_WIN32) && defined(_MSC_VER)
+
+
+#endif // _COMPAT_H_
--- a/test_conformance/compatibility/test_common/harness/conversions.c
+++ b/test_conformance/compatibility/test_common/harness/conversions.c
--- a/test_conformance/compatibility/test_common/harness/conversions.h
+++ b/test_conformance/compatibility/test_common/harness/conversions.h
@@ -0,0 +1,126 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _conversions_h
+#define _conversions_h
+
+#include "compat.h"
+
+#include "errorHelpers.h"
+#include "mt19937.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Note: the next three all have to match in size and order!! */
+
+enum ExplicitTypes
+{
+    kBool        = 0,
+    kChar,
+    kUChar,
+    kUnsignedChar,
+    kShort,
+    kUShort,
+    kUnsignedShort,
+    kInt,
+    kUInt,
+    kUnsignedInt,
+    kLong,
+    kULong,
+    kUnsignedLong,
+    kFloat,
+    kHalf,
+    kDouble,
+    kNumExplicitTypes
+};
+
+typedef enum ExplicitTypes    ExplicitType;
+
+enum RoundingTypes
+{
+    kRoundToEven = 0,
+    kRoundToZero,
+    kRoundToPosInf,
+    kRoundToNegInf,
+    kRoundToNearest,
+
+    kNumRoundingTypes,
+
+    kDefaultRoundingType = kRoundToNearest
+};
+
+typedef enum RoundingTypes    RoundingType;
+
+extern void             print_type_to_string(ExplicitType type, void *data, char* string);
+extern size_t           get_explicit_type_size( ExplicitType type );
+extern const char *     get_explicit_type_name( ExplicitType type );
+extern void             convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
+
+extern void             generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
+extern void    *         create_random_data( ExplicitType type, MTdata d, size_t count );
+
+extern cl_long          read_upscale_signed( void *inRaw, ExplicitType inType );
+extern cl_ulong         read_upscale_unsigned( void *inRaw, ExplicitType inType );
+extern float            read_as_float( void *inRaw, ExplicitType inType );
+
+extern float            get_random_float(float low, float high, MTdata d);
+extern double           get_random_double(double low, double high, MTdata d);
+extern float            any_float( MTdata d );
+extern double           any_double( MTdata d );
+
+extern int              random_in_range( int minV, int maxV, MTdata d );
+
+size_t get_random_size_t(size_t low, size_t high, MTdata d);
+
+// Note: though this takes a double, this is for use with single precision tests
+static inline int IsFloatSubnormal( float x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ float d; uint32_t u;}u;
+    u.d = fabsf(x);
+    return (u.u-1) < 0x007fffffU;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
+#endif
+}
+
+static inline int IsDoubleSubnormal( double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ double d; uint64_t u;}u;
+    u.d = fabs( x);
+    return (u.u-1) < 0x000fffffffffffffULL;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
+#endif
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // _conversions_h
+
+
--- a/test_conformance/compatibility/test_common/harness/errorHelpers.c
+++ b/test_conformance/compatibility/test_common/harness/errorHelpers.c
@@ -0,0 +1,579 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "errorHelpers.h"
+
+const char    *IGetErrorString( int clErrorCode )
+{
+    switch( clErrorCode )
+    {
+        case CL_SUCCESS:                return "CL_SUCCESS";
+        case CL_DEVICE_NOT_FOUND:        return "CL_DEVICE_NOT_FOUND";
+        case CL_DEVICE_NOT_AVAILABLE:    return "CL_DEVICE_NOT_AVAILABLE";
+        case CL_COMPILER_NOT_AVAILABLE:    return "CL_COMPILER_NOT_AVAILABLE";
+        case CL_MEM_OBJECT_ALLOCATION_FAILURE:    return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
+        case CL_OUT_OF_RESOURCES:        return "CL_OUT_OF_RESOURCES";
+        case CL_OUT_OF_HOST_MEMORY:        return "CL_OUT_OF_HOST_MEMORY";
+        case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
+        case CL_MEM_COPY_OVERLAP:        return "CL_MEM_COPY_OVERLAP";
+        case CL_IMAGE_FORMAT_MISMATCH:    return "CL_IMAGE_FORMAT_MISMATCH";
+        case CL_IMAGE_FORMAT_NOT_SUPPORTED:    return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
+        case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
+        case CL_MAP_FAILURE:            return "CL_MAP_FAILURE";
+        case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
+        case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
+        case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
+        case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
+        case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
+        case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
+        case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
+        case CL_INVALID_VALUE:            return "CL_INVALID_VALUE";
+        case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
+        case CL_INVALID_DEVICE:            return "CL_INVALID_DEVICE";
+        case CL_INVALID_CONTEXT:        return "CL_INVALID_CONTEXT";
+        case CL_INVALID_QUEUE_PROPERTIES:    return "CL_INVALID_QUEUE_PROPERTIES";
+        case CL_INVALID_COMMAND_QUEUE:    return "CL_INVALID_COMMAND_QUEUE";
+        case CL_INVALID_HOST_PTR:    return "CL_INVALID_HOST_PTR";
+        case CL_INVALID_MEM_OBJECT:        return "CL_INVALID_MEM_OBJECT";
+        case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:        return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
+        case CL_INVALID_IMAGE_SIZE:        return "CL_INVALID_IMAGE_SIZE";
+        case CL_INVALID_SAMPLER:        return "CL_INVALID_SAMPLER";
+        case CL_INVALID_BINARY:        return "CL_INVALID_BINARY";
+        case CL_INVALID_BUILD_OPTIONS:        return "CL_INVALID_BUILD_OPTIONS";
+        case CL_INVALID_PROGRAM:        return "CL_INVALID_PROGRAM";
+        case CL_INVALID_PROGRAM_EXECUTABLE:        return "CL_INVALID_PROGRAM_EXECUTABLE";
+        case CL_INVALID_KERNEL_NAME:    return "CL_INVALID_KERNEL_NAME";
+        case CL_INVALID_KERNEL_DEFINITION:    return "CL_INVALID_KERNEL_DEFINITION";
+        case CL_INVALID_KERNEL:            return "CL_INVALID_KERNEL";
+        case CL_INVALID_ARG_INDEX:        return "CL_INVALID_ARG_INDEX";
+        case CL_INVALID_ARG_VALUE:        return "CL_INVALID_ARG_VALUE";
+        case CL_INVALID_ARG_SIZE:        return "CL_INVALID_ARG_SIZE";
+        case CL_INVALID_KERNEL_ARGS:    return "CL_INVALID_KERNEL_ARGS";
+        case CL_INVALID_WORK_DIMENSION:        return "CL_INVALID_WORK_DIMENSION";
+        case CL_INVALID_WORK_GROUP_SIZE:    return "CL_INVALID_WORK_GROUP_SIZE";
+        case CL_INVALID_WORK_ITEM_SIZE:    return "CL_INVALID_WORK_ITEM_SIZE";
+        case CL_INVALID_GLOBAL_OFFSET:        return "CL_INVALID_GLOBAL_OFFSET";
+        case CL_INVALID_EVENT_WAIT_LIST:    return "CL_INVALID_EVENT_WAIT_LIST";
+        case CL_INVALID_EVENT:            return "CL_INVALID_EVENT";
+        case CL_INVALID_OPERATION:        return "CL_INVALID_OPERATION";
+        case CL_INVALID_GL_OBJECT:        return "CL_INVALID_GL_OBJECT";
+        case CL_INVALID_BUFFER_SIZE:    return "CL_INVALID_BUFFER_SIZE";
+        case CL_INVALID_MIP_LEVEL:      return "CL_INVALID_MIP_LEVEL";
+        case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
+        case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
+        case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
+        case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
+        case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
+        case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
+        default: return "(unknown)";
+    }
+}
+
+const char *GetChannelOrderName( cl_channel_order order )
+{
+    switch( order )
+    {
+        case CL_R:      return "CL_R";
+        case CL_A:      return "CL_A";
+        case CL_Rx:     return "CL_Rx";
+        case CL_RG:     return "CL_RG";
+        case CL_RA:     return "CL_RA";
+        case CL_RGx:    return "CL_RGx";
+        case CL_RGB:    return "CL_RGB";
+        case CL_RGBx:   return "CL_RGBx";
+        case CL_RGBA:      return "CL_RGBA";
+        case CL_ARGB:      return "CL_ARGB";
+        case CL_BGRA:      return "CL_BGRA";
+        case CL_INTENSITY: return "CL_INTENSITY";
+        case CL_LUMINANCE: return "CL_LUMINANCE";
+#if defined CL_1RGB_APPLE
+        case CL_1RGB_APPLE: return "CL_1RGB_APPLE";
+#endif
+#if defined CL_BGR1_APPLE
+        case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
+#endif
+        default:        return NULL;
+    }
+}
+
+int IsChannelOrderSupported( cl_channel_order order )
+{
+    switch( order )
+    {
+        case CL_R:
+        case CL_A:
+        case CL_Rx:
+        case CL_RG:
+        case CL_RA:
+        case CL_RGx:
+        case CL_RGB:
+        case CL_RGBx:
+        case CL_RGBA:
+        case CL_ARGB:
+        case CL_BGRA:
+        case CL_INTENSITY:
+        case CL_LUMINANCE:
+            return 1;
+#if defined CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+            return 1;
+#endif
+#if defined CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+            return 1;
+#endif
+        default:
+            return 0;
+    }
+}
+
+const char *GetChannelTypeName( cl_channel_type type )
+{
+    switch( type )
+    {
+        case CL_SNORM_INT8:         return "CL_SNORM_INT8";
+        case CL_SNORM_INT16:        return "CL_SNORM_INT16";
+        case CL_UNORM_INT8:         return "CL_UNORM_INT8";
+        case CL_UNORM_INT16:        return "CL_UNORM_INT16";
+        case CL_UNORM_SHORT_565:    return "CL_UNORM_SHORT_565";
+        case CL_UNORM_SHORT_555:    return "CL_UNORM_SHORT_555";
+        case CL_UNORM_INT_101010:   return "CL_UNORM_INT_101010";
+        case CL_SIGNED_INT8:        return "CL_SIGNED_INT8";
+        case CL_SIGNED_INT16:       return "CL_SIGNED_INT16";
+        case CL_SIGNED_INT32:       return "CL_SIGNED_INT32";
+        case CL_UNSIGNED_INT8:      return "CL_UNSIGNED_INT8";
+        case CL_UNSIGNED_INT16:     return "CL_UNSIGNED_INT16";
+        case CL_UNSIGNED_INT32:     return "CL_UNSIGNED_INT32";
+        case CL_HALF_FLOAT:         return "CL_HALF_FLOAT";
+        case CL_FLOAT:              return "CL_FLOAT";
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:     return "CL_SFIXED14_APPLE";
+#endif
+        default:                    return NULL;
+    }
+}
+
+int IsChannelTypeSupported( cl_channel_type type )
+{
+    switch( type )
+    {
+        case CL_SNORM_INT8:
+        case CL_SNORM_INT16:
+        case CL_UNORM_INT8:
+        case CL_UNORM_INT16:
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+        case CL_UNORM_INT_101010:
+        case CL_SIGNED_INT8:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+        case CL_UNSIGNED_INT8:
+        case CL_UNSIGNED_INT16:
+        case CL_UNSIGNED_INT32:
+        case CL_HALF_FLOAT:
+        case CL_FLOAT:
+            return 1;
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+            return 1;
+#endif
+        default:
+            return 0;
+    }
+}
+
+const char *GetAddressModeName( cl_addressing_mode mode )
+{
+    switch( mode )
+    {
+        case CL_ADDRESS_NONE:                return "CL_ADDRESS_NONE";
+        case CL_ADDRESS_CLAMP_TO_EDGE:        return "CL_ADDRESS_CLAMP_TO_EDGE";
+        case CL_ADDRESS_CLAMP:                return "CL_ADDRESS_CLAMP";
+        case CL_ADDRESS_REPEAT:                return "CL_ADDRESS_REPEAT";
+        case CL_ADDRESS_MIRRORED_REPEAT:    return "CL_ADDRESS_MIRRORED_REPEAT";
+        default:                            return NULL;
+    }
+}
+
+const char *GetDeviceTypeName( cl_device_type type )
+{
+    switch( type )
+    {
+        case CL_DEVICE_TYPE_GPU:    return "CL_DEVICE_TYPE_GPU";
+        case CL_DEVICE_TYPE_CPU:    return "CL_DEVICE_TYPE_CPU";
+        case CL_DEVICE_TYPE_ACCELERATOR:    return "CL_DEVICE_TYPE_ACCELERATOR";
+        case CL_DEVICE_TYPE_ALL:    return "CL_DEVICE_TYPE_ALL";
+        default:                    return NULL;
+    }
+}
+
+const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
+{
+    static char scratch[ 1024 ];
+    size_t i, j;
+
+    if( buffer == NULL )
+        buffer = scratch;
+
+    unsigned char *p = (unsigned char *)dataBuffer;
+    char *bPtr;
+
+    buffer[ 0 ] = 0;
+    bPtr = buffer;
+    for( i = 0; i < vecSize; i++ )
+    {
+        if( i > 0 )
+        {
+            bPtr[ 0 ] = ' ';
+            bPtr++;
+        }
+        for( j = 0; j < typeSize; j++ )
+        {
+            sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
+            bPtr += 2;
+        }
+        p += typeSize;
+    }
+    bPtr[ 0 ] = 0;
+
+    return buffer;
+}
+
+#ifndef MAX
+#define MAX( _a, _b )       ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+#if defined( _MSC_VER )
+#define scalbnf(_a, _i )    ldexpf( _a, _i )
+#define scalbn(_a, _i )     ldexp( _a, _i )
+#define scalbnl(_a, _i )    ldexpl( _a, _i )
+#endif
+
+static float Ulp_Error_Half_Float( float test, double reference );
+static inline float  half2float( cl_ushort half );
+
+// taken from math tests
+#define HALF_MIN_EXP    -13
+#define HALF_MANT_DIG    11
+static float Ulp_Error_Half_Float( float test, double reference )
+{
+    union{ double d; uint64_t u; }u;     u.d = reference;
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+    double testVal = test;
+    if( u.u & 0x000fffffffffffffULL )
+    { // Non-power of two and NaN
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+
+    if( isinf( reference ) )
+    {
+        if( (double) test == reference )
+            return 0.0f;
+
+        return (float) (testVal - reference );
+    }
+
+    // reference is a normal power of two or a zero
+    int ulp_exp =  HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    return (float) scalbn( testVal - reference, ulp_exp );
+}
+
+// Taken from vLoadHalf test
+static inline float half2float( cl_ushort us )
+{
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ unsigned int u; float f;}uu;
+
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+
+        int shift = __builtin_clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+
+            return uu.f;
+        }
+
+    exponent += 127 - 15;
+    exponent <<= 23;
+
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+
+    return uu.f;
+}
+
+float Ulp_Error_Half( cl_ushort test, float reference )
+{
+    return Ulp_Error_Half_Float( half2float(test), reference );
+}
+
+
+float Ulp_Error( float test, double reference )
+{
+    union{ double d; uint64_t u; }u;     u.d = reference;
+    double testVal = test;
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+
+    if( isinf( reference ) )
+    {
+        if( testVal == reference )
+            return 0.0f;
+
+        return (float) (testVal - reference );
+    }
+
+    if( isinf( testVal) )
+    { // infinite test value, but finite (but possibly overflowing in float) reference.
+      //
+      // The function probably overflowed prematurely here. Formally, the spec says this is
+      // an infinite ulp error and should not be tolerated. Unfortunately, this would mean
+      // that the internal precision of some half_pow implementations would have to be 29+ bits
+      // at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
+      // is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
+      // after rounding to single is 4*32 = 128, which will ultimately result in premature
+      // overflow, even though a good faith representation would be correct to within 2**-29
+      // interally.
+
+        // In the interest of not requiring the implementation go to extraordinary lengths to
+        // deliver a half precision function, we allow premature overflow within the limit
+        // of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
+        // 2**128, the next value that would appear in the number line if float had sufficient range.
+        testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
+
+        // Note that the same hack may not work in long double, which is not guaranteed to have
+        // more range than double.  It is not clear that premature overflow should be tolerated for
+        // double.
+    }
+
+    if( u.u & 0x000fffffffffffffULL )
+    { // Non-power of two and NaN
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+
+    // reference is a normal power of two or a zero
+    // The unbiased exponent of the ulp unit place
+    int ulp_exp =  FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    return (float) scalbn( testVal - reference, ulp_exp );
+}
+
+float Ulp_Error_Double( double test, long double reference )
+{
+  // Deal with long double = double
+  // On most systems long double is a higher precision type than double. They provide either
+  // a 80-bit or greater floating point type, or they provide a head-tail double double format.
+  // That is sufficient to represent the accuracy of a floating point result to many more bits
+  // than double and we can calculate sub-ulp errors. This is the standard system for which this
+  // test suite is designed.
+  //
+  // On some systems double and long double are the same thing. Then we run into a problem,
+  // because our representation of the infinitely precise result (passed in as reference above)
+  // can be off by as much as a half double precision ulp itself.  In this case, we inflate the
+  // reported error by half an ulp to take this into account.  A more correct and permanent fix
+  // would be to undertake refactoring the reference code to return results in this format:
+  //
+  //    typedef struct DoubleReference
+  //    { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult)        (infinitely precise)
+  //        double  correctlyRoundedResult;     // as best we can
+  //        double  ulps;                       // plus a fractional amount to account for the difference
+  //    }DoubleReference;                       //     between infinitely precise result and correctlyRoundedResult, in units of ulps.
+  //
+  // This would provide a useful higher-than-double precision format for everyone that we can use,
+  // and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
+  // that use a head to tail double double for long double.
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+
+    int x;
+    long double testVal = test;
+    if( 0.5L != frexpl( reference, &x) )
+    { // Non-power of two and NaN
+        if( isinf( reference ) )
+        {
+            if( testVal == reference )
+                return 0.0f;
+
+            return (float) ( testVal - reference );
+        }
+
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        float result = (float) scalbnl( testVal - reference, ulp_exp );
+
+        // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+        if( sizeof(long double) == sizeof( double ) )
+            result += copysignf( 0.5f, result);
+
+        return result;
+
+    }
+
+    // reference is a normal power of two or a zero
+    // The unbiased exponent of the ulp unit place
+    int ulp_exp =  DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    float result = (float) scalbnl( testVal - reference, ulp_exp );
+
+    // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+    if( sizeof(long double) == sizeof( double ) )
+        result += copysignf( 0.5f, result);
+
+    return result;
+}
+
+cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
+{
+  int error;
+  size_t size_ret;
+
+  // Does the program object exist?
+  if (program != NULL) {
+
+    // Was the number of devices given
+    if (num_devices == 0) {
+
+      // If zero devices were specified then allocate and query the device list from the context
+      cl_context context;
+      error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
+      test_error( error, "Unable to query program's context" );
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
+      test_error( error, "Unable to query context's device size" );
+      num_devices = size_ret / sizeof(cl_device_id);
+      device_list = (cl_device_id *) malloc(size_ret);
+      if (device_list == NULL) {
+          print_error( error, "malloc failed" );
+          return CL_OUT_OF_HOST_MEMORY;
+      }
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
+      test_error( error, "Unable to query context's devices" );
+
+    }
+
+    // For each device in the device_list
+    unsigned int i;
+    for (i = 0; i < num_devices; i++) {
+
+      // Get the build status
+      cl_build_status build_status;
+      error = clGetProgramBuildInfo(program,
+                                    device_list[i],
+                                    CL_PROGRAM_BUILD_STATUS,
+                                    sizeof(build_status),
+                                    &build_status,
+                                    &size_ret);
+      test_error( error, "Unable to query build status" );
+
+      // If the build failed then log the status, and allocate the build log, log it and free it
+      if (build_status != CL_BUILD_SUCCESS) {
+
+        log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to query build log size" );
+        char *build_log = (char *) malloc(size_ret);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to query build log" );
+        log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        free(build_log);
+
+      }
+
+    }
+
+    // Was the number of devices given
+    if (num_devices == 0) {
+
+      // If zero devices were specified then free the device list
+      free(device_list);
+
+    }
+
+  }
+
+  return CL_SUCCESS;
+}
--- a/test_conformance/compatibility/test_common/harness/errorHelpers.h
+++ b/test_conformance/compatibility/test_common/harness/errorHelpers.h
@@ -0,0 +1,149 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _errorHelpers_h
+#define _errorHelpers_h
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+#include <stdlib.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LOWER_IS_BETTER     0
+#define HIGHER_IS_BETTER    1
+
+// If USE_ATF is defined, all log_error and log_info calls can be routed to test library
+// functions as described below. This is helpful for integration into an automated testing
+// system.
+#if USE_ATF
+// export BUILD_WITH_ATF=1
+    #include <ATF/ATF.h>
+    #define test_start() ATFTestStart()
+    #define log_info ATFLogInfo
+    #define log_error ATFLogError
+    #define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__)
+    #define test_finish() ATFTestFinish()
+    #define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
+    #define vlog ATFLogInfo
+    #define vlog_error ATFLogError
+#else
+    #define test_start()
+    #define log_info printf
+    #define log_error printf
+    #define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,        \
+                        _higherBetter?"higher is better":"lower is better", _number )
+    #define test_finish()
+    #define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,    \
+                        _higherBetter?"higher is better":"lower is better" , _number)
+    #ifdef _WIN32
+        #ifdef __MINGW32__
+            // Use __mingw_printf since it supports "%a" format specifier
+            #define vlog __mingw_printf
+            #define vlog_error __mingw_printf
+        #else
+            // Use home-baked function that treats "%a" as "%f"
+        static int vlog_win32(const char *format, ...);
+        #define vlog vlog_win32
+        #define vlog_error vlog_win32
+        #endif
+    #else
+        #define vlog_error printf
+        #define vlog printf
+    #endif
+#endif
+
+#define ct_assert(b)          ct_assert_i(b, __LINE__)
+#define ct_assert_i(b, line)  ct_assert_ii(b, line)
+#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
+
+#define test_error(errCode,msg)    test_error_ret(errCode,msg,errCode)
+#define test_error_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
+#define print_error(errCode,msg)    log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
+
+// expected error code vs. what we got
+#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
+#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
+#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
+#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
+
+extern const char    *IGetErrorString( int clErrorCode );
+
+extern float Ulp_Error_Half( cl_ushort test, float reference );
+extern float Ulp_Error( float test, double reference );
+extern float Ulp_Error_Double( double test, long double reference );
+
+extern const char *GetChannelTypeName( cl_channel_type type );
+extern int IsChannelTypeSupported( cl_channel_type type );
+extern const char *GetChannelOrderName( cl_channel_order order );
+extern int IsChannelOrderSupported( cl_channel_order order );
+extern const char *GetAddressModeName( cl_addressing_mode mode );
+
+extern const char *GetDeviceTypeName( cl_device_type type );
+
+// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
+extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
+
+#if defined (_WIN32) && !defined(__MINGW32__)
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+static int vlog_win32(const char *format, ...)
+{
+    const char *new_format = format;
+
+    if (strstr(format, "%a")) {
+        char *temp;
+        if ((temp = strdup(format)) == NULL) {
+            printf("vlog_win32: Failed to allocate memory for strdup\n");
+            return -1;
+        }
+        new_format = temp;
+        while (*temp) {
+            // replace %a with %f
+            if ((*temp == '%') && (*(temp+1) == 'a')) {
+                *(temp+1) = 'f';
+            }
+            temp++;
+        }
+    }
+
+    va_list args;
+    va_start(args, format);
+    vprintf(new_format, args);
+    va_end(args);
+
+    if (new_format != format) {
+        free((void*)new_format);
+    }
+
+    return 0;
+}
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _errorHelpers_h
+
+
--- a/test_conformance/compatibility/test_common/harness/fpcontrol.h
+++ b/test_conformance/compatibility/test_common/harness/fpcontrol.h
@@ -0,0 +1,104 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _fpcontrol_h
+#define _fpcontrol_h
+
+// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
+// to FTZ mode if the device hardware is running in that mode.  We have explored all other options short of writing correctly rounded operations
+// in integer code, and have found this is the only way to correctly verify operation.
+//
+// Non-Apple implementations will need to provide their own implentation for these features.  If the reference hardware and device are both
+// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty.  If the device is running in non-default
+// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
+#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
+    typedef int     FPU_mode_type;
+#if defined( __i386__ ) || defined( __x86_64__ )
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
+    extern __thread fpu_control_t fpu_control;
+#endif
+    // Set the reference hardware floating point unit to FTZ mode
+    static inline void ForceFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode | 0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control |= _FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
+#else
+        #error ForceFTZ needs an implentation
+#endif
+    }
+
+    // Disable the denorm flush to zero
+    static inline void DisableFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode & ~0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control &= ~_FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
+#else
+#error DisableFTZ needs an implentation
+#endif
+    }
+
+    // Restore the reference hardware to floating point state indicated by *mode
+    static inline void RestoreFPState( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        _mm_setcsr( *mode );
+#elif defined( __PPC__)
+        fpu_control = *mode;
+#elif defined (__arm__)
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        __asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
+#else
+        #error RestoreFPState needs an implementation
+#endif
+    }
+#else
+        #error ForceFTZ and RestoreFPState need implentations
+#endif
+
+#endif
--- a/test_conformance/compatibility/test_common/harness/genericThread.cpp
+++ b/test_conformance/compatibility/test_common/harness/genericThread.cpp
@@ -0,0 +1,53 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "genericThread.h"
+
+#if defined(_WIN32)
+#include <windows.h>
+#else // !_WIN32
+#include <pthread.h>
+#endif
+
+void * genericThread::IStaticReflector( void * data )
+{
+    genericThread *t = (genericThread *)data;
+    return t->IRun();
+}
+
+bool genericThread::Start( void )
+{
+#if defined(_WIN32)
+    mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
+    return ( mHandle != NULL );
+#else // !_WIN32
+    int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
+    return ( error == 0 );
+#endif // !_WIN32
+}
+
+void * genericThread::Join( void )
+{
+#if defined(_WIN32)
+    WaitForSingleObject( (HANDLE)mHandle, INFINITE );
+    return NULL;
+#else // !_WIN32
+    void * retVal;
+    int error = pthread_join( (pthread_t)mHandle, &retVal );
+    if( error != 0 )
+        retVal = NULL;
+    return retVal;
+#endif // !_WIN32
+}
--- a/test_conformance/compatibility/test_common/harness/genericThread.h
+++ b/test_conformance/compatibility/test_common/harness/genericThread.h
@@ -0,0 +1,42 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _genericThread_h
+#define _genericThread_h
+
+#include <stdio.h>
+
+class genericThread
+{
+    public:
+
+        virtual ~genericThread() {}
+
+        bool    Start( void );
+        void *    Join( void );
+
+    protected:
+
+        virtual void *    IRun( void ) = 0;
+
+    private:
+
+        void* mHandle;
+
+        static void * IStaticReflector( void * data );
+};
+
+#endif // _genericThread_h
+
--- a/test_conformance/compatibility/test_common/harness/imageHelpers.cpp
+++ b/test_conformance/compatibility/test_common/harness/imageHelpers.cpp
@@ -0,0 +1,249 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "imageHelpers.h"
+
+size_t get_format_type_size( const cl_image_format *format )
+{
+    return get_channel_data_type_size( format->image_channel_data_type );
+}
+
+size_t get_channel_data_type_size( cl_channel_type channelType )
+{
+    switch( channelType )
+    {
+        case CL_SNORM_INT8:
+        case CL_UNORM_INT8:
+        case CL_SIGNED_INT8:
+        case CL_UNSIGNED_INT8:
+            return 1;
+
+        case CL_SNORM_INT16:
+        case CL_UNORM_INT16:
+        case CL_SIGNED_INT16:
+        case CL_UNSIGNED_INT16:
+        case CL_HALF_FLOAT:
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+#endif
+            return sizeof( cl_short );
+
+        case CL_SIGNED_INT32:
+        case CL_UNSIGNED_INT32:
+            return sizeof( cl_int );
+
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+#ifdef OBSOLETE_FORAMT
+        case CL_UNORM_SHORT_565_REV:
+        case CL_UNORM_SHORT_555_REV:
+#endif
+            return 2;
+
+#ifdef OBSOLETE_FORAMT
+        case CL_UNORM_INT_8888:
+        case CL_UNORM_INT_8888_REV:
+            return 4;
+#endif
+
+        case CL_UNORM_INT_101010:
+#ifdef OBSOLETE_FORAMT
+        case CL_UNORM_INT_101010_REV:
+#endif
+            return 4;
+
+        case CL_FLOAT:
+            return sizeof( cl_float );
+
+        default:
+            return 0;
+    }
+}
+
+size_t get_format_channel_count( const cl_image_format *format )
+{
+    return get_channel_order_channel_count( format->image_channel_order );
+}
+
+size_t get_channel_order_channel_count( cl_channel_order order )
+{
+    switch( order )
+    {
+        case CL_R:
+        case CL_A:
+        case CL_Rx:
+        case CL_INTENSITY:
+        case CL_LUMINANCE:
+            return 1;
+
+        case CL_RG:
+        case CL_RA:
+        case CL_RGx:
+            return 2;
+
+        case CL_RGB:
+        case CL_RGBx:
+            return 3;
+
+        case CL_RGBA:
+        case CL_ARGB:
+        case CL_BGRA:
+#ifdef CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+#endif
+#ifdef CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+#endif
+            return 4;
+
+        default:
+            return 0;
+    }
+}
+
+int is_format_signed( const cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_SNORM_INT8:
+        case CL_SIGNED_INT8:
+        case CL_SNORM_INT16:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+        case CL_HALF_FLOAT:
+        case CL_FLOAT:
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+#endif
+            return 1;
+
+        default:
+            return 0;
+    }
+}
+
+size_t get_pixel_size( cl_image_format *format )
+{
+  switch( format->image_channel_data_type )
+  {
+    case CL_SNORM_INT8:
+    case CL_UNORM_INT8:
+    case CL_SIGNED_INT8:
+    case CL_UNSIGNED_INT8:
+      return get_format_channel_count( format );
+
+    case CL_SNORM_INT16:
+    case CL_UNORM_INT16:
+    case CL_SIGNED_INT16:
+    case CL_UNSIGNED_INT16:
+    case CL_HALF_FLOAT:
+#ifdef  CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+#endif
+      return get_format_channel_count( format ) * sizeof( cl_ushort );
+
+    case CL_SIGNED_INT32:
+    case CL_UNSIGNED_INT32:
+      return get_format_channel_count( format ) * sizeof( cl_int );
+
+    case CL_UNORM_SHORT_565:
+    case CL_UNORM_SHORT_555:
+#ifdef OBSOLETE_FORAMT
+    case CL_UNORM_SHORT_565_REV:
+    case CL_UNORM_SHORT_555_REV:
+#endif
+      return 2;
+
+#ifdef OBSOLETE_FORAMT
+    case CL_UNORM_INT_8888:
+    case CL_UNORM_INT_8888_REV:
+      return 4;
+#endif
+
+    case CL_UNORM_INT_101010:
+#ifdef OBSOLETE_FORAMT
+    case CL_UNORM_INT_101010_REV:
+#endif
+      return 4;
+
+    case CL_FLOAT:
+      return get_format_channel_count( format ) * sizeof( cl_float );
+
+    default:
+      return 0;
+  }
+}
+
+int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
+{
+    cl_image_format formatList[ 128 ];
+    unsigned int outFormatCount, i;
+    int error;
+
+
+    /* Make sure each image format is supported */
+    if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
+    return error;
+
+
+    /* Look for one that is an 8-bit format */
+    for( i = 0; i < outFormatCount; i++ )
+    {
+        if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
+       formatList[ i ].image_channel_data_type == CL_UNORM_INT8 ||
+           formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
+           formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
+        {
+      if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
+      {
+        *outFormat = formatList[ i ];
+        return 0;
+      }
+        }
+    }
+
+    return -1;
+}
+
+int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
+{
+    cl_image_format formatList[ 128 ];
+    unsigned int outFormatCount, i;
+    int error;
+
+
+  /* Make sure each image format is supported */
+  if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
+    return error;
+
+  /* Look for one that is an 8-bit format */
+  for( i = 0; i < outFormatCount; i++ )
+  {
+        if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
+           formatList[ i ].image_channel_data_type == CL_FLOAT ||
+           formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
+           formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
+    {
+      if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
+      {
+        *outFormat = formatList[ i ];
+        return 0;
+      }
+    }
+    }
+
+    return -1;
+}
+
--- a/test_conformance/compatibility/test_common/harness/imageHelpers.h
+++ b/test_conformance/compatibility/test_common/harness/imageHelpers.h
@@ -0,0 +1,37 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _imageHelpers_h
+#define _imageHelpers_h
+
+#include "errorHelpers.h"
+
+
+extern size_t get_format_type_size( const cl_image_format *format );
+extern size_t get_channel_data_type_size( cl_channel_type channelType );
+extern size_t get_format_channel_count( const cl_image_format *format );
+extern size_t get_channel_order_channel_count( cl_channel_order order );
+extern int    is_format_signed( const cl_image_format *format );
+extern size_t get_pixel_size( cl_image_format *format );
+
+/* Helper to get any ol image format as long as it is 8-bits-per-channel */
+extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
+
+/* Helper to get any ol image format as long as it is 32-bits-per-channel */
+extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
+
+
+#endif // _imageHelpers_h
+
--- a/test_conformance/compatibility/test_common/harness/kernelHelpers.c
+++ b/test_conformance/compatibility/test_common/harness/kernelHelpers.c
@@ -0,0 +1,684 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "kernelHelpers.h"
+#include "errorHelpers.h"
+#include "imageHelpers.h"
+
+#if defined(__MINGW32__)
+#include "mingw_compat.h"
+#endif
+
+int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName )
+{
+    int error = CL_SUCCESS;
+
+    /* Create the program object from source */
+    *outProgram = clCreateProgramWithSource( context, numKernelLines, kernelProgram, NULL, &error );
+    if( *outProgram == NULL || error != CL_SUCCESS)
+    {
+        print_error( error, "clCreateProgramWithSource failed" );
+        return error;
+    }
+
+    /* Compile the program */
+  int buildProgramFailed = 0;
+  int printedSource = 0;
+    error = clBuildProgram( *outProgram, 0, NULL, NULL, NULL, NULL );
+  if (error != CL_SUCCESS)
+  {
+    unsigned int i;
+    print_error(error, "clBuildProgram failed");
+    buildProgramFailed = 1;
+    printedSource = 1;
+    log_error( "Original source is: ------------\n" );
+    for( i = 0; i < numKernelLines; i++ )
+      log_error( "%s", kernelProgram[ i ] );
+  }
+
+  // Verify the build status on all devices
+  cl_uint deviceCount = 0;
+  error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
+  if (error != CL_SUCCESS) {
+    print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
+      return error;
+  }
+
+  if (deviceCount == 0) {
+    log_error("No devices found for program.\n");
+    return -1;
+  }
+
+  cl_device_id    *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
+  if( NULL == devices )
+    return -1;
+  memset( devices, 0, deviceCount * sizeof( cl_device_id ));
+  error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
+  if (error != CL_SUCCESS) {
+    print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
+    free( devices );
+    return error;
+  }
+
+  cl_uint z;
+  for( z = 0; z < deviceCount; z++ )
+  {
+    char deviceName[4096] = "";
+    error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
+    if (error != CL_SUCCESS || deviceName[0] == '\0') {
+      log_error("Device \"%d\" failed to return a name\n", z);
+      print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
+    }
+
+    cl_build_status buildStatus;
+    error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
+    if (error != CL_SUCCESS) {
+      print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
+      free( devices );
+      return error;
+    }
+
+    if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
+      char log[10240] = "";
+      if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
+
+      char statusString[64] = "";
+      if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
+        sprintf(statusString, "CL_BUILD_SUCCESS");
+      else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
+        sprintf(statusString, "CL_BUILD_NONE");
+      else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
+        sprintf(statusString, "CL_BUILD_ERROR");
+      else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
+        sprintf(statusString, "CL_BUILD_IN_PROGRESS");
+      else
+        sprintf(statusString, "UNKNOWN (%d)", buildStatus);
+
+      if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
+      error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
+      if (error != CL_SUCCESS || log[0]=='\0'){
+        log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
+        if (error) {
+               print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+            free( devices );
+            return error;
+        } else {
+          log_error("clGetProgramBuildInfo returned an empty log.\n");
+          free( devices );
+          return -1;
+        }
+      }
+      // In this case we've already printed out the code above.
+      if (!printedSource)
+      {
+        unsigned int i;
+        log_error( "Original source is: ------------\n" );
+        for( i = 0; i < numKernelLines; i++ )
+          log_error( "%s", kernelProgram[ i ] );
+        printedSource = 1;
+      }
+      log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
+      log_error( "%s\n", log );
+      log_error( "\n----------\n" );
+      free( devices );
+      return -1;
+    }
+  }
+
+    /* And create a kernel from it */
+    *outKernel = clCreateKernel( *outProgram, kernelName, &error );
+    if( *outKernel == NULL || error != CL_SUCCESS)
+    {
+        print_error( error, "Unable to create kernel" );
+        free( devices );
+        return error;
+    }
+
+    free( devices );
+    return 0;
+}
+
+int get_device_version( cl_device_id id, size_t* major, size_t* minor)
+{
+    cl_char buffer[ 4098 ];
+    size_t length;
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    cl_int error = clGetDeviceInfo( id, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
+
+    char *p1 = (char *)buffer + strlen( "OpenCL " );
+    char *p2;
+    while( *p1 == ' ' )
+        p1++;
+    *major = strtol( p1, &p2, 10 );
+    error = *p2 != '.';
+    test_error(error, "ERROR: Version number must contain a decimal point!");
+    *minor = strtol( ++p2, NULL, 10 );
+    return error;
+}
+
+int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
+{
+    cl_device_id *devices;
+    size_t size, maxCommonSize = 0;
+    int numDevices, i, j, error;
+  cl_uint numDims;
+    size_t outSize;
+  size_t sizeLimit[]={1,1,1};
+
+
+    /* Assume fewer than 16 devices will be returned */
+  error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
+  test_error( error, "Unable to obtain list of devices size for context" );
+  devices = (cl_device_id *)malloc(outSize);
+  error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
+  test_error( error, "Unable to obtain list of devices for context" );
+
+    numDevices = (int)( outSize / sizeof( cl_device_id ) );
+
+    for( i = 0; i < numDevices; i++ )
+    {
+        error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
+        test_error( error, "Unable to obtain max work group size for device" );
+        if( size < maxCommonSize || maxCommonSize == 0)
+            maxCommonSize = size;
+
+        error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
+        test_error( error, "Unable to obtain max work group size for device and kernel combo" );
+        if( size < maxCommonSize  || maxCommonSize == 0)
+            maxCommonSize = size;
+
+    error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
+    test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
+    sizeLimit[0] = 1;
+    error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
+        test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+        if (outLimits != NULL)
+        {
+      if (i == 0) {
+        for (j=0; j<3; j++)
+          outLimits[j] = sizeLimit[j];
+      } else {
+        for (j=0; j<(int)numDims; j++) {
+          if (sizeLimit[j] < outLimits[j])
+            outLimits[j] = sizeLimit[j];
+        }
+      }
+    }
+    }
+    free(devices);
+
+    *outMaxSize = (unsigned int)maxCommonSize;
+    return 0;
+}
+
+
+int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
+                                   size_t globalThreadSize, size_t *outMaxSize )
+{
+  size_t sizeLimit[3];
+    int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
+    if( error != 0 )
+        return error;
+
+    /* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
+    /* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
+     the modulo test will succeed and break the loop anyway */
+    for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
+        ;
+    return 0;
+}
+
+int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
+                                   size_t *globalThreadSizes, size_t *outMaxSizes )
+{
+  size_t sizeLimit[3];
+    size_t maxSize;
+    int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
+    if( error != 0 )
+        return error;
+
+    /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
+       sizes */
+
+    /* Simple case */
+    if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
+    {
+    if (globalThreadSizes[ 0 ] <= sizeLimit[0] &&  globalThreadSizes[ 1 ] <= sizeLimit[1]) {
+      outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
+      outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
+      return 0;
+    }
+    }
+
+  size_t remainingSize, sizeForThisOne;
+  remainingSize = maxSize;
+  int i, j;
+  for (i=0 ; i<2; i++) {
+    if (globalThreadSizes[i] > remainingSize)
+      sizeForThisOne = remainingSize;
+    else
+      sizeForThisOne = globalThreadSizes[i];
+    for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
+    outMaxSizes[i] = sizeForThisOne;
+    remainingSize = maxSize;
+    for (j=0; j<=i; j++)
+      remainingSize /=outMaxSizes[j];
+  }
+
+    return 0;
+}
+
+int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
+                                      size_t *globalThreadSizes, size_t *outMaxSizes )
+{
+  size_t sizeLimit[3];
+    size_t maxSize;
+    int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
+    if( error != 0 )
+        return error;
+    /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
+     sizes */
+
+    /* Simple case */
+    if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
+    {
+    if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
+      outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
+      outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
+      outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
+      return 0;
+    }
+    }
+
+  size_t remainingSize, sizeForThisOne;
+  remainingSize = maxSize;
+  int i, j;
+  for (i=0 ; i<3; i++) {
+    if (globalThreadSizes[i] > remainingSize)
+      sizeForThisOne = remainingSize;
+    else
+      sizeForThisOne = globalThreadSizes[i];
+    for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
+    outMaxSizes[i] = sizeForThisOne;
+    remainingSize = maxSize;
+    for (j=0; j<=i; j++)
+      remainingSize /=outMaxSizes[j];
+  }
+
+    return 0;
+}
+
+/* Helper to determine if an extension is supported by a device */
+int is_extension_available( cl_device_id device, const char *extensionName )
+{
+    char *extString;
+    size_t size = 0;
+    int err;
+    int result = 0;
+
+    if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &size) ))
+    {
+        log_error( "Error: failed to determine size of device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
+        return 0;
+    }
+
+    if( 0 == size )
+        return 0;
+
+    extString = (char*) malloc( size );
+    if( NULL == extString )
+    {
+        log_error( "Error: unable to allocate %ld byte buffer for extension string at %s:%d (err = %d)\n", size, __FILE__, __LINE__,  err );
+        return 0;
+    }
+
+    if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, size, extString, NULL) ))
+    {
+        log_error( "Error: failed to obtain device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
+        free( extString );
+        return 0;
+    }
+
+    if( strstr( extString, extensionName ) )
+        result = 1;
+
+    free( extString );
+    return result;
+}
+
+/* Helper to determine if a device supports an image format */
+int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
+{
+    cl_image_format *list;
+    cl_uint count = 0;
+    cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
+    if( count == 0 )
+        return 0;
+
+    list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
+    if( NULL == list )
+    {
+        log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__,  err );
+        return 0;
+    }
+
+    cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
+    if( error )
+    {
+        log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
+        free( list );
+        return 0;
+    }
+
+    // iterate looking for a match.
+    cl_uint i;
+    for( i = 0; i < count; i++ )
+    {
+        if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
+            fmt->image_channel_order == list[ i ].image_channel_order )
+            break;
+    }
+
+    free( list );
+    return ( i < count ) ? true : false;
+}
+
+size_t get_pixel_bytes( const cl_image_format *fmt );
+size_t get_pixel_bytes( const cl_image_format *fmt )
+{
+    size_t chanCount;
+    switch( fmt->image_channel_order )
+    {
+        case CL_R:
+        case CL_A:
+        case CL_Rx:
+        case CL_INTENSITY:
+        case CL_LUMINANCE:
+            chanCount = 1;
+            break;
+        case CL_RG:
+        case CL_RA:
+        case CL_RGx:
+            chanCount = 2;
+            break;
+        case CL_RGB:
+        case CL_RGBx:
+            chanCount = 3;
+            break;
+        case CL_RGBA:
+        case CL_ARGB:
+        case CL_BGRA:
+#ifdef CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+#endif
+#ifdef CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+#endif
+            chanCount = 4;
+            break;
+        default:
+            log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
+            abort();
+            break;
+    }
+
+    switch( fmt->image_channel_data_type )
+    {
+          case CL_UNORM_SHORT_565:
+          case CL_UNORM_SHORT_555:
+            return 2;
+
+          case CL_UNORM_INT_101010:
+            return 4;
+
+          case CL_SNORM_INT8:
+          case CL_UNORM_INT8:
+          case CL_SIGNED_INT8:
+          case CL_UNSIGNED_INT8:
+            return chanCount;
+
+          case CL_SNORM_INT16:
+          case CL_UNORM_INT16:
+          case CL_HALF_FLOAT:
+          case CL_SIGNED_INT16:
+          case CL_UNSIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+          case CL_SFIXED14_APPLE:
+#endif
+            return chanCount * 2;
+
+          case CL_SIGNED_INT32:
+          case CL_UNSIGNED_INT32:
+          case CL_FLOAT:
+            return chanCount * 4;
+
+        default:
+            log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
+            abort();
+    }
+
+    return 0;
+}
+
+int verifyImageSupport( cl_device_id device )
+{
+    if( checkForImageSupport( device ) )
+    {
+        log_error( "ERROR: Device does not supported images as required by this test!\n" );
+        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+    return 0;
+}
+
+int checkForImageSupport( cl_device_id device )
+{
+    cl_uint i;
+    int error;
+
+
+    /* Check the device props to see if images are supported at all first */
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
+    test_error( error, "Unable to query device for image support" );
+    if( i == 0 )
+    {
+        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+
+    /* So our support is good */
+    return 0;
+}
+
+int checkFor3DImageSupport( cl_device_id device )
+{
+    cl_uint i;
+    int error;
+
+    /* Check the device props to see if images are supported at all first */
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
+    test_error( error, "Unable to query device for image support" );
+    if( i == 0 )
+    {
+        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+
+    char profile[128];
+    error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
+    test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
+    if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
+    {
+        size_t width = -1L;
+        size_t height = -1L;
+        size_t depth = -1L;
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
+
+        if( 0 == (height | width | depth ))
+            return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+
+    /* So our support is good */
+    return 0;
+}
+
+void * align_malloc(size_t size, size_t alignment)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    return _aligned_malloc(size, alignment);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    void * ptr = NULL;
+    if (0 == posix_memalign(&ptr, alignment, size))
+        return ptr;
+    return NULL;
+#elif defined(__MINGW32__)
+    return __mingw_aligned_malloc(size, alignment);
+#else
+    #error "Please add support OS for aligned malloc"
+#endif
+}
+
+void   align_free(void * ptr)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    _aligned_free(ptr);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    return  free(ptr);
+#elif defined(__MINGW32__)
+    return __mingw_aligned_free(ptr);
+#else
+    #error "Please add support OS for aligned free"
+#endif
+}
+
+size_t get_min_alignment(cl_context context)
+{
+    static cl_uint align_size = 0;
+
+    if( 0 == align_size )
+    {
+        cl_device_id * devices;
+        size_t devices_size = 0;
+        cl_uint result = 0;
+        cl_int error;
+        int i;
+
+        error = clGetContextInfo (context,
+                                  CL_CONTEXT_DEVICES,
+                                  0,
+                                  NULL,
+                                  &devices_size);
+        test_error_ret(error, "clGetContextInfo failed", 0);
+
+        devices = (cl_device_id*)malloc(devices_size);
+        if (devices == NULL) {
+            print_error( error, "malloc failed" );
+            return 0;
+        }
+
+        error = clGetContextInfo (context,
+                                  CL_CONTEXT_DEVICES,
+                                  devices_size,
+                                  (void*)devices,
+                                  NULL);
+        test_error_ret(error, "clGetContextInfo failed", 0);
+
+        for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
+        {
+            cl_uint alignment = 0;
+
+            error = clGetDeviceInfo (devices[i],
+                                     CL_DEVICE_MEM_BASE_ADDR_ALIGN,
+                                     sizeof(cl_uint),
+                                     (void*)&alignment,
+                                     NULL);
+
+            if (error == CL_SUCCESS)
+            {
+                alignment >>= 3;    // convert bits to bytes
+                result = (alignment > result) ? alignment : result;
+            }
+            else
+                print_error( error, "clGetDeviceInfo failed" );
+        }
+
+        align_size = result;
+        free(devices);
+    }
+
+    return align_size;
+}
+
+cl_device_fp_config get_default_rounding_mode( cl_device_id device )
+{
+    char profileStr[128] = "";
+    cl_device_fp_config single = 0;
+    int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
+    if( error )
+        test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
+
+    if( single & CL_FP_ROUND_TO_NEAREST )
+        return CL_FP_ROUND_TO_NEAREST;
+
+    if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
+        test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
+
+    // Make sure we are an embedded device before allowing a pass
+    if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
+        test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
+
+    if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
+        test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
+
+    return CL_FP_ROUND_TO_ZERO;
+}
+
+int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
+{
+    cl_command_queue_properties realProps;
+    cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_PROPERTIES, sizeof( realProps ), &realProps, NULL );
+    test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
+
+    return ( realProps & prop ) ? 1 : 0;
+}
+
+int printDeviceHeader( cl_device_id device )
+{
+    char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
+    int error;
+
+    error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
+    test_error( error, "Unable to get CL_DEVICE_NAME for device" );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
+    test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
+    test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
+    test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
+
+    log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
+             deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
+             ( error == CL_SUCCESS ) ? cLangVersion : "" );
+
+    return CL_SUCCESS;
+}
--- a/test_conformance/compatibility/test_common/harness/kernelHelpers.h
+++ b/test_conformance/compatibility/test_common/harness/kernelHelpers.h
@@ -0,0 +1,128 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _kernelHelpers_h
+#define _kernelHelpers_h
+
+#include "compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined (__MINGW32__)
+#include <malloc.h>
+#endif
+
+#include <string.h>
+
+#ifdef __APPLE__
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/opencl.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+/*
+ *  The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
+ *
+ *  const char *source = {
+ *      INIT_OPENCL_DEBUG_INFO
+ *      "__kernel void foo( int x )\n"
+ *      "{\n"
+ *      "   ...\n"
+ *      "}\n"
+ *  };
+ */
+#define INIT_OPENCL_DEBUG_INFO                      SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
+#define SET_OPENCL_LINE_INFO(_line, _file)          "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
+#ifndef STRINGIFY_VALUE
+    #define STRINGIFY_VALUE(_x)                     STRINGIFY(_x)
+#endif
+#ifndef STRINGIFY
+    #define STRINGIFY(_x)                           #_x
+#endif
+
+/* Helper that creates a single program and kernel from a single-kernel program source */
+extern int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName );
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
+
+/* Helper to get major/minor number for a device */
+extern int get_device_version( cl_device_id id, size_t* major, size_t* minor);
+
+/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
+extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
+
+/* Helper to determine if an extension is supported by a device */
+extern int is_extension_available( cl_device_id device, const char *extensionName );
+
+/* Helper to determine if a device supports an image format */
+extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
+
+/* Helper to get pixel size for a pixel format */
+size_t get_pixel_bytes( const cl_image_format *fmt );
+
+/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
+extern int verifyImageSupport( cl_device_id device );
+
+/* Checks that the given device supports images. Same as verify, but doesn't print an error */
+extern int checkForImageSupport( cl_device_id device );
+extern int checkFor3DImageSupport( cl_device_id device );
+
+/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
+extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
+
+/* Helper for aligned memory allocation */
+void * align_malloc(size_t size, size_t alignment);
+void   align_free(void *);
+
+/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
+size_t get_min_alignment(cl_context context);
+
+/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
+cl_device_fp_config get_default_rounding_mode( cl_device_id device );
+
+#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device )    \
+    if( checkForImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support images. Skipping test...\n" );    \
+        return 0;    \
+    }
+
+#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )    \
+    if( checkFor3DImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" );    \
+        return 0;    \
+    }
+
+/* Prints out the standard device header for all tests given the device to print for */
+extern int printDeviceHeader( cl_device_id device );
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif // _kernelHelpers_h
--- a/test_conformance/compatibility/test_common/harness/mingw_compat.c
+++ b/test_conformance/compatibility/test_common/harness/mingw_compat.c
@@ -0,0 +1,59 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined(__MINGW32__)
+
+#include "mingw_compat.h"
+#include <stdio.h>
+#include <string.h>
+
+//This function is unavailable on various mingw compilers,
+//especially 64 bit so implementing it here
+const char *basename_dot=".";
+char*
+basename(char *path)
+{
+    char *p = path, *b = NULL;
+    int len = strlen(path);
+
+    if (path == NULL) {
+        return (char*)basename_dot;
+    }
+
+    // Not absolute path on windows
+    if (path[1] != ':') {
+        return path;
+    }
+
+    // Trim trailing path seperators
+    if (path[len - 1]  == '\\' ||
+        path[len - 1]  == '/' ) {
+        len--;
+        path[len] = '\0';
+    }
+
+    while (len) {
+        while((*p != '\\' || *p != '/')  && len) {
+            p++;
+            len--;
+        }
+        p++;
+        b = p;
+     }
+
+     return b;
+}
+
+#endif
--- a/test_conformance/compatibility/test_common/harness/mingw_compat.h
+++ b/test_conformance/compatibility/test_common/harness/mingw_compat.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef MINGW_COMPAT_H
+#define MINGW_COMPAT_H
+
+#if defined(__MINGW32__)
+char *basename(char *path);
+#include <malloc.h>
+
+#if defined(__MINGW64__)
+//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
+#define __mingw_aligned_malloc _aligned_malloc
+#define __mingw_aligned_free _aligned_free
+#include <stddef.h>
+#endif //(__MINGW64__)
+
+#endif //(__MINGW32__)
+#endif // MINGW_COMPAT_H
--- a/test_conformance/compatibility/test_common/harness/msvc9.c
+++ b/test_conformance/compatibility/test_common/harness/msvc9.c
@@ -0,0 +1,749 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined(_WIN32) && defined (_MSC_VER)
+
+#include "compat.h"
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+#include <CL/cl_platform.h>
+
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                   rint, rintf
+//
+///////////////////////////////////////////////////////////////////
+
+float copysignf( float x, float y )
+{
+    union{ cl_uint u; float f; }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
+
+    return ux.f;
+}
+
+double copysign( double x, double y )
+{
+    union{ cl_ulong u; double f; }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
+
+    return ux.f;
+}
+
+long double copysignl( long double x, long double y )
+{
+    union
+    {
+        long double f;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
+    }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
+
+    return ux.f;
+}
+
+float rintf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
+        float rounded = x + magic;
+        rounded -= magic;
+        x = copysignf( rounded, x );
+    }
+
+    return x;
+}
+
+double rint(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
+    {
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
+        double rounded = x + magic;
+        rounded -= magic;
+        x = copysign( rounded, x );
+    }
+
+    return x;
+}
+
+long double rintl(long double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
+    {
+        long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
+        long double rounded = x + magic;
+        rounded -= magic;
+        x = copysignl( rounded, x );
+    }
+
+    return x;
+}
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                   ilogb, ilogbf, ilogbl
+//
+///////////////////////////////////////////////////////////////////
+#ifndef FP_ILOGB0
+    #define FP_ILOGB0   INT_MIN
+#endif
+
+#ifndef FP_ILOGBNAN
+    #define FP_ILOGBNAN INT_MIN
+#endif
+
+int ilogb (double x)
+{
+    union{ double f; cl_ulong u;} u;
+    u.f = x;
+
+    cl_ulong absx = u.u & CL_LONG_MAX;
+    if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
+    {
+        switch( absx )
+        {
+            case 0:
+                return FP_ILOGB0;
+            case 0x7ff0000000000000ULL:
+                return INT_MAX;
+            default:
+                if( absx > 0x7ff0000000000000ULL )
+                    return FP_ILOGBNAN;
+
+                // subnormal
+                u.u = absx | 0x3ff0000000000000ULL;
+                u.f -= 1.0;
+                return (u.u >> 52) - (1023 + 1022);
+        }
+    }
+
+    return (absx >> 52) - 1023;
+}
+
+
+int ilogbf (float x)
+{
+    union{ float f; cl_uint u;} u;
+    u.f = x;
+
+    cl_uint absx = u.u & 0x7fffffff;
+    if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
+    {
+        switch( absx )
+        {
+            case 0:
+                return FP_ILOGB0;
+            case 0x7f800000U:
+                return INT_MAX;
+            default:
+                if( absx > 0x7f800000 )
+                    return FP_ILOGBNAN;
+
+                // subnormal
+                u.u = absx | 0x3f800000U;
+                u.f -= 1.0f;
+                return (u.u >> 23) - (127 + 126);
+        }
+    }
+
+    return (absx >> 23) - 127;
+}
+
+int ilogbl (long double x)
+{
+    union
+    {
+        long double f;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
+    } u;
+    u.f = x;
+
+    int exp = u.u.sexp & 0x7fff;
+    if( 0 == exp )
+    {
+        if( 0 == u.u.m )
+            return FP_ILOGB0;
+
+        //subnormal
+        u.u.sexp = 0x3fff;
+        u.f -= 1.0f;
+        exp = u.u.sexp & 0x7fff;
+
+        return exp - (0x3fff + 0x3ffe);
+    }
+    else if( 0x7fff == exp )
+    {
+        if( u.u.m & CL_LONG_MAX )
+            return FP_ILOGBNAN;
+
+        return INT_MAX;
+    }
+
+    return exp - 0x3fff;
+}
+
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                 fmax, fmin, fmaxf, fminf
+//
+///////////////////////////////////////////////////////////////////
+
+static void GET_BITS_SP32(float fx, unsigned int* ux)
+{
+    volatile union {float f; unsigned int u;} _bitsy;
+    _bitsy.f = (fx);
+    *ux = _bitsy.u;
+}
+/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
+/* { */
+/*     volatile union {float f; unsigned int i;} _bitsy; */
+/*     _bitsy.f = (fx); */
+/*     *ux = _bitsy.i; */
+/* } */
+static void PUT_BITS_SP32(unsigned int ux, float* fx)
+{
+    volatile union {float f; unsigned int u;} _bitsy;
+    _bitsy.u = (ux);
+    *fx = _bitsy.f;
+}
+/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
+/* { */
+/*     volatile union {float f; unsigned int i;} _bitsy; */
+/*     _bitsy.i = (ux); */
+/*     *fx = _bitsy.f; */
+/* } */
+static void GET_BITS_DP64(double dx, unsigned __int64* lx)
+{
+    volatile union {double d; unsigned __int64 l;} _bitsy;
+    _bitsy.d = (dx);
+    *lx = _bitsy.l;
+}
+static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
+{
+    volatile union {double d; unsigned __int64 l;} _bitsy;
+    _bitsy.l = (lx);
+    *dx = _bitsy.d;
+}
+
+#if 0
+int SIGNBIT_DP64(double x )
+{
+    int hx;
+    _GET_HIGH_WORD(hx,x);
+    return((hx>>31));
+}
+#endif
+
+/* fmax(x, y) returns the larger (more positive) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+/* This works so long as the compiler knows that (x != x) means
+   that x is NaN; gcc does. */
+double fmax(double x, double y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x >= y ? x : y;
+}
+
+
+/* fmin(x, y) returns the smaller (more negative) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+double fmin(double x, double y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x <= y ? x : y;
+}
+
+
+float fmaxf( float x, float y )
+{
+    if( isnan(y) )
+        return x;
+
+    return x >= y ? x : y;
+}
+
+/* fminf(x, y) returns the smaller (more negative) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+float fminf(float x, float y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x <= y ? x : y;
+}
+
+long double scalblnl(long double x, long n)
+{
+    union
+    {
+        long double d;
+        struct{ cl_ulong m; cl_ushort sexp;}u;
+    }u;
+    u.u.m = CL_LONG_MIN;
+
+    if( x == 0.0L || n < -2200)
+        return copysignl( 0.0L, x );
+
+    if( n > 2200 )
+        return INFINITY;
+
+    if( n < 0 )
+    {
+        u.u.sexp = 0x3fff - 1022;
+        while( n <= -1022 )
+        {
+            x *= u.d;
+            n += 1022;
+        }
+        u.u.sexp = 0x3fff + n;
+        x *= u.d;
+        return x;
+    }
+
+    if( n > 0 )
+    {
+        u.u.sexp = 0x3fff + 1023;
+        while( n >= 1023 )
+        {
+            x *= u.d;
+            n -= 1023;
+        }
+        u.u.sexp = 0x3fff + n;
+        x *= u.d;
+        return x;
+    }
+
+    return x;
+}
+
+///////////////////////////////////////////////////////////////////
+//
+//                          log2
+//
+///////////////////////////////////////////////////////////////////
+const static cl_double log_e_base2   = 1.4426950408889634074;
+const static cl_double log_10_base2  = 3.3219280948873623478;
+
+//double log10(double x);
+
+double log2(double x)
+{
+    return 1.44269504088896340735992468100189214 * log(x);
+}
+
+long double log2l(long double x)
+{
+    return 1.44269504088896340735992468100189214L * log(x);
+}
+
+///////////////////////////////////////////////////////////////////
+//
+//                  misc functions
+//
+///////////////////////////////////////////////////////////////////
+
+/*
+// This function is commented out because the Windows implementation should never call munmap.
+// If it is calling it, we have a bug. Please file a bugzilla.
+int munmap(void *addr, size_t len)
+{
+// FIXME: this is not correct.  munmap is like free()    http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
+
+    return (int)VirtualAlloc( (LPVOID)addr, len,
+                  MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
+}
+*/
+
+uint64_t ReadTime( void )
+{
+    LARGE_INTEGER current;
+    QueryPerformanceCounter(&current);
+    return (uint64_t)current.QuadPart;
+}
+
+double SubtractTime( uint64_t endTime, uint64_t startTime )
+{
+    static double PerformanceFrequency = 0.0;
+
+    if (PerformanceFrequency == 0.0) {
+        LARGE_INTEGER frequency;
+        QueryPerformanceFrequency(&frequency);
+        PerformanceFrequency = (double) frequency.QuadPart;
+    }
+
+    return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
+}
+
+float make_nan()
+{
+/* This is the IEEE 754 single-precision format:
+    unsigned int mantissa:  22;
+    unsigned int quiet_nan:  1;
+    unsigned int exponent:   8;
+    unsigned int negative:   1;
+*/
+     //const static unsigned
+     static const int32_t _nan = 0x7fc00000;
+     return *(const float*)(&_nan);
+}
+
+float nanf( const char* str)
+{
+    cl_uint u = atoi( str );
+    u |= 0x7fc00000U;
+    return *( float*)(&u);
+}
+
+
+double nan( const char* str)
+{
+    cl_ulong u = atoi( str );
+    u |= 0x7ff8000000000000ULL;
+    return *( double*)(&u);
+}
+
+// double check this implementatation
+long double nanl( const char* str)
+{
+    union
+    {
+        long double f;
+        struct { cl_ulong m; cl_ushort sexp; }u;
+    }u;
+    u.u.sexp = 0x7fff;
+    u.u.m = 0x8000000000000000ULL | atoi( str );
+
+    return u.f;
+}
+
+double trunc(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
+    {
+        cl_long rounded = x;
+        x = copysign( (double) rounded, x );
+    }
+
+    return x;
+}
+
+float  truncf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        cl_int rounded = x;
+        x = copysignf( (float) rounded, x );
+    }
+
+    return x;
+}
+
+long lround(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 0.5 )
+        return 0;
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
+    {
+        absx += 0.5;
+        cl_long rounded = absx;
+        absx = rounded;
+        x = copysign( absx, x );
+    }
+
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
+
+    return (long) x;
+}
+
+long lroundf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 0.5f )
+        return 0;
+
+    if( absx < 8388608.0f )
+    {
+        absx += 0.5f;
+        cl_int rounded = absx;
+        absx = rounded;
+        x = copysignf(  absx, x );
+    }
+
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
+
+    return (long) x;
+}
+
+double round(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 0.5 )
+        return copysign( 0.0, x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
+    {
+        absx += 0.5;
+        cl_long rounded = absx;
+        absx = rounded;
+        x = copysign( absx, x );
+    }
+
+    return x;
+}
+
+float  roundf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 0.5f )
+        return copysignf( 0.0f, x);
+
+    if( absx < 8388608.0f )
+    {
+        absx += 0.5f;
+        cl_int rounded = absx;
+        absx = rounded;
+        x = copysignf( absx, x );
+    }
+
+    return x;
+}
+
+long double roundl(long double x)
+{
+    long double absx = fabsl(x);
+
+    if( absx < 0.5L )
+        return copysignl( 0.0L, x);
+
+    if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
+    {
+        absx += 0.5L;
+        cl_ulong rounded = absx;
+        absx = rounded;
+        x = copysignl( absx, x );
+    }
+
+    return x;
+}
+
+int signbit(double x)
+{
+    union
+    {
+        double f;
+        cl_ulong u;
+    }u;
+    u.f = x;
+    return u.u >> 63;
+}
+
+int signbitf(float x)
+{
+    union
+    {
+        float f;
+        cl_uint u;
+    }u;
+    u.f = x;
+    return u.u >> 31;
+}
+
+float cbrtf( float x )
+{
+    float z = pow( fabs((double) x), 1.0 / 3.0 );
+    return copysignf( z, x );
+}
+
+double cbrt( double x )
+{
+    return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
+}
+
+float int2float (int32_t ix)
+{
+    union {
+        float   f;
+        int32_t i;
+    } u;
+    u.i = ix;
+    return u.f;
+}
+
+int32_t float2int (float   fx)
+{
+    union {
+        float   f;
+        int32_t i;
+    } u;
+    u.f = fx;
+    return u.i;
+}
+
+#if defined(_MSC_VER) && !defined(_WIN64)
+/** Returns the number of leading 0-bits in x,
+    starting at the most significant bit position.
+    If x is 0, the result is undefined.
+*/
+int __builtin_clz(unsigned int pattern)
+{
+#if 0
+    int res;
+    __asm {
+        mov eax, pattern
+        bsr eax, eax
+        mov res, eax
+    }
+    return 31 - res;
+#endif
+    unsigned long index;
+    unsigned char res = _BitScanReverse( &index, pattern);
+    if (res) {
+        return 8*sizeof(int) - 1 - index;
+    } else {
+        return 8*sizeof(int);
+    }
+}
+#else
+int __builtin_clz(unsigned int pattern)
+{
+   int count;
+   if (pattern == 0u) {
+       return 32;
+   }
+   count = 31;
+   if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
+   if (pattern >=  1u<<8) { pattern >>=  8; count -=  8; }
+   if (pattern >=  1u<<4) { pattern >>=  4; count -=  4; }
+   if (pattern >=  1u<<2) { pattern >>=  2; count -=  2; }
+   if (pattern >=  1u<<1) {                 count -=  1; }
+   return count;
+}
+
+#endif //defined(_MSC_VER) && !defined(_WIN64)
+
+#include <intrin.h>
+#include <emmintrin.h>
+long int lrint (double x)
+{
+    double absx = fabs(x);
+
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
+    {
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
+        double rounded = x + magic;
+        rounded -= magic;
+        return (long int) rounded;
+    }
+
+    return (long int) x;
+}
+
+long int lrintf (float x)
+{
+    float absx = fabsf(x);
+
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
+        float rounded = x + magic;
+        rounded -= magic;
+        return (long int) rounded;
+    }
+
+    return (long int) x;
+}
+
+int usleep(int usec)
+{
+    Sleep((usec + 999) / 1000);
+    return 0;
+}
+
+int fetestexcept(int excepts)
+{
+    unsigned int status = _statusfp();
+    return excepts & (
+        ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
+        ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
+        ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
+        ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
+        ((status & _SW_INVALID) ? FE_INVALID : 0)
+    );
+}
+
+int feclearexcept(int excepts)
+{
+    _clearfp();
+    return 0;
+}
+
+#endif //defined(_WIN32)
--- a/test_conformance/compatibility/test_common/harness/mt19937.c
+++ b/test_conformance/compatibility/test_common/harness/mt19937.c
@@ -0,0 +1,274 @@
+/*
+   A C-program for MT19937, with initialization improved 2002/1/26.
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   Before using, initialize the state by using init_genrand(seed)
+   or init_by_array(init_key, key_length).
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote
+        products derived from this software without specific prior written
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+   Any feedback is very welcome.
+   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+
+   Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mt19937.h"
+#include "mingw_compat.h"
+
+#ifdef __SSE2__
+    #include <emmintrin.h>
+#endif
+
+static void * align_malloc(size_t size, size_t alignment)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    return _aligned_malloc(size, alignment);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    void * ptr = NULL;
+    if (0 == posix_memalign(&ptr, alignment, size))
+        return ptr;
+    return NULL;
+#elif defined(__MINGW32__)
+    return __mingw_aligned_malloc(size, alignment);
+#else
+    #error "Please add support OS for aligned malloc"
+#endif
+}
+
+static void   align_free(void * ptr)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    _aligned_free(ptr);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    return  free(ptr);
+#elif defined(__MINGW32__)
+    return __mingw_aligned_free(ptr);
+#else
+    #error "Please add support OS for aligned free"
+#endif
+}
+
+
+/* Period parameters */
+#define N 624   /* vector code requires multiple of 4 here */
+#define M 397
+#define MATRIX_A    (cl_uint) 0x9908b0dfUL   /* constant vector a */
+#define UPPER_MASK  (cl_uint) 0x80000000UL /* most significant w-r bits */
+#define LOWER_MASK  (cl_uint) 0x7fffffffUL /* least significant r bits */
+
+typedef struct _MTdata
+{
+    cl_uint mt[N];
+#ifdef __SSE2__
+    cl_uint cache[N];
+#endif
+    cl_int  mti;
+}_MTdata;
+
+/* initializes mt[N] with a seed */
+MTdata init_genrand(cl_uint s)
+{
+    MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
+    if( NULL != r )
+    {
+        cl_uint *mt = r->mt;
+        int mti = 0;
+        mt[0]= s; // & 0xffffffffUL;
+        for (mti=1; mti<N; mti++) {
+            mt[mti] = (cl_uint)
+            (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
+            /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+            /* In the previous versions, MSBs of the seed affect   */
+            /* only MSBs of the array mt[].                        */
+            /* 2002/01/09 modified by Makoto Matsumoto             */
+    //        mt[mti] &= 0xffffffffUL;
+            /* for >32 bit machines */
+        }
+        r->mti = mti;
+    }
+
+    return r;
+}
+
+void    free_mtdata( MTdata d )
+{
+    if(d)
+        align_free(d);
+}
+
+/* generates a random number on [0,0xffffffff]-interval */
+cl_uint genrand_int32( MTdata d)
+{
+    /* mag01[x] = x * MATRIX_A  for x=0,1 */
+    static const cl_uint mag01[2]={0x0UL, MATRIX_A};
+#ifdef __SSE2__
+    static volatile int init = 0;
+    static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
+#endif
+
+
+    cl_uint *mt = d->mt;
+    cl_uint y;
+
+    if (d->mti == N)
+    { /* generate N words at one time */
+        int kk;
+
+#ifdef __SSE2__
+        if( 0 == init )
+        {
+            upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
+            lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
+            one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
+            matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
+            c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
+            c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
+            init = 1;
+        }
+#endif
+
+        kk = 0;
+#ifdef __SSE2__
+        // vector loop
+        for( ; kk + 4 <= N-M; kk += 4 )
+        {
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
+
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) );    // mt[kk+M] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
+        }
+#endif
+        for ( ;kk<N-M;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+
+#ifdef __SSE2__
+        // advance to next aligned location
+        for (;kk<N-1 && (kk & 3);kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+
+        // vector loop
+        for( ; kk + 4 <= N-1; kk += 4 )
+        {
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
+
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) );          // mt[kk+M-N] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
+        }
+#endif
+
+        for (;kk<N-1;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+        y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
+        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+
+#ifdef __SSE2__
+        // Do the tempering ahead of time in vector code
+        for( kk = 0; kk + 4 <= N; kk += 4 )
+        {
+            __m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) );                            // y = mt[k];
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) );                             // y ^= (y >> 11);
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) );        // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) );       // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) );                             // y ^= (y >> 18);
+            _mm_store_si128( (__m128i*)(d->cache+kk), vy );
+        }
+#endif
+
+        d->mti = 0;
+    }
+#ifdef __SSE2__
+    y = d->cache[d->mti++];
+#else
+    y = mt[d->mti++];
+
+    /* Tempering */
+    y ^= (y >> 11);
+    y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+    y ^= (y << 15) & (cl_uint) 0xefc60000UL;
+    y ^= (y >> 18);
+#endif
+
+
+    return y;
+}
+
+cl_ulong genrand_int64( MTdata d)
+{
+    return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
+}
+
+/* generates a random number on [0,1]-real-interval */
+double genrand_real1(MTdata d)
+{
+    return genrand_int32(d)*(1.0/4294967295.0);
+    /* divided by 2^32-1 */
+}
+
+/* generates a random number on [0,1)-real-interval */
+double genrand_real2(MTdata d)
+{
+    return genrand_int32(d)*(1.0/4294967296.0);
+    /* divided by 2^32 */
+}
+
+/* generates a random number on (0,1)-real-interval */
+double genrand_real3(MTdata d)
+{
+    return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
+    /* divided by 2^32 */
+}
+
+/* generates a random number on [0,1) with 53-bit resolution*/
+double genrand_res53(MTdata d)
+{
+    unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
+    return(a*67108864.0+b)*(1.0/9007199254740992.0);
+}
--- a/test_conformance/compatibility/test_common/harness/mt19937.h
+++ b/test_conformance/compatibility/test_common/harness/mt19937.h
@@ -0,0 +1,99 @@
+
+/*
+ *  mt19937.h
+ *
+ *  Mersenne Twister.
+ *
+   A C-program for MT19937, with initialization improved 2002/1/26.
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   Before using, initialize the state by using init_genrand(seed)
+   or init_by_array(init_key, key_length).
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote
+        products derived from this software without specific prior written
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+   Any feedback is very welcome.
+   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+ */
+
+#ifndef MT19937_H
+#define MT19937_H   1
+
+#if defined( __APPLE__ )
+    #include <OpenCL/cl_platform.h>
+#else
+    #include <CL/cl_platform.h>
+#endif
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+/*
+ *      Interfaces here have been modified from original sources so that they
+ *      are safe to call reentrantly, so long as a different MTdata is used
+ *      on each thread.
+ */
+
+typedef struct _MTdata  *MTdata;
+
+/* Create the random number generator with seed */
+MTdata init_genrand( cl_uint /*seed*/ );
+
+/* release memory used by a MTdata private data */
+void   free_mtdata( MTdata /*data*/ );
+
+/* generates a random number on [0,0xffffffff]-interval */
+cl_uint genrand_int32( MTdata /*data*/);
+
+/* generates a random number on [0,0xffffffffffffffffULL]-interval */
+cl_ulong genrand_int64( MTdata /*data*/);
+
+/* generates a random number on [0,1]-real-interval */
+double genrand_real1( MTdata /*data*/);
+
+/* generates a random number on [0,1)-real-interval */
+double genrand_real2( MTdata /*data*/);
+
+/* generates a random number on (0,1)-real-interval */
+double genrand_real3( MTdata /*data*/);
+
+/* generates a random number on [0,1) with 53-bit resolution*/
+double genrand_res53( MTdata /*data*/ );
+
+
+#ifdef __cplusplus
+    }
+#endif
+
+#endif  /* MT19937_H */
--- a/test_conformance/compatibility/test_common/harness/ref_counting.h
+++ b/test_conformance/compatibility/test_common/harness/ref_counting.h
@@ -0,0 +1,49 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _ref_counting_h
+#define _ref_counting_h
+
+#define MARK_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
+    test_error( error, "Unable to check reference count for " #type );
+
+#define TEST_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount_new; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
+    test_error( error, "Unable to check reference count for " #type ); \
+    if( c##_refCount != c##_refCount_new ) \
+    {    \
+        log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new );    \
+        return -1; \
+    }
+
+#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
+#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
+
+#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
+#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
+
+#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
+#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
+
+#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
+#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
+
+#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
+#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
+
+#endif // _ref_counting_h
--- a/test_conformance/compatibility/test_common/harness/rounding_mode.c
+++ b/test_conformance/compatibility/test_common/harness/rounding_mode.c
@@ -0,0 +1,175 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "rounding_mode.h"
+
+#if !(defined(_WIN32) && defined(_MSC_VER))
+RoundingMode set_round( RoundingMode r, Type outType )
+{
+    static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
+    static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
+    const int *p = int_rounds;
+    if( outType == kfloat || outType == kdouble )
+        p = flt_rounds;
+    int oldRound = fegetround();
+    fesetround( p[r] );
+
+    switch( oldRound )
+    {
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
+        default:
+            abort();    // ??!
+    }
+    return kDefaultRoundingMode;    //never happens
+}
+
+RoundingMode get_round( void )
+{
+    int oldRound = fegetround();
+
+    switch( oldRound )
+    {
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
+    }
+
+    return kDefaultRoundingMode;
+}
+
+#else
+RoundingMode set_round( RoundingMode r, Type outType )
+{
+    static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
+    unsigned int oldRound;
+
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
+    if (err) {
+        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
+        return kDefaultRoundingMode;    //what else never happens
+    }
+
+    oldRound &= _MCW_RC;
+
+    RoundingMode old =
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
+
+    _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
+    return old;    //returning old rounding mode
+}
+
+RoundingMode get_round( void )
+{
+    unsigned int oldRound;
+
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
+    oldRound &= _MCW_RC;
+    return
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
+}
+
+#endif
+
+//
+// FlushToZero() sets the host processor into ftz mode.  It is intended to have a remote effect on the behavior of the code in
+// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
+// software by testing against FLT_MIN or DBL_MIN in that file.
+//
+// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT* have the behavior in section 7.5.3 of
+// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
+// operators do (e.g. add, subtract, multiply, divide, etc.)
+//
+// Configuring hardware to FTZ mode varies by platform.
+// CAUTION: Some C implementations may also fail to behave properly in this mode.
+//
+//  On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
+//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
+//          is used for floating point computation! If your OS uses x87, you'll need to figure out how
+//          to turn that off for the conversions code in basic_test_conversions.c so that they flush to
+//          zero properly.  Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
+//          in which case, these function are at liberty to do nothing.
+//
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
+#endif
+void *FlushToZero( void )
+{
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ int i;  void *p; }u = { _mm_getcsr() };
+        _mm_setcsr( u.i | 0x8040 );
+        return u.p;
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        // processor is already in FTZ mode -- do nothing
+        return NULL;
+    #elif defined( __PPC__ )
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags |= _FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        return NULL;
+        #else
+        #error Unknown arch
+    #endif
+#else
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+#endif
+}
+
+// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
+void UnFlushToZero( void *p)
+{
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ void *p; int i;  }u = { p };
+        _mm_setcsr( u.i );
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        // processor is already in FTZ mode -- do nothing
+    #elif defined( __PPC__)
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags &= ~_FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        #else
+        #error Unknown arch
+    #endif
+#else
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+#endif
+}
--- a/test_conformance/compatibility/test_common/harness/rounding_mode.h
+++ b/test_conformance/compatibility/test_common/harness/rounding_mode.h
@@ -0,0 +1,71 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __ROUNDING_MODE_H__
+#define __ROUNDING_MODE_H__
+
+#include "compat.h"
+
+#include <stdlib.h>
+
+#if (defined(_WIN32) && defined (_MSC_VER))
+#include "errorHelpers.h"
+#include "testHarness.h"
+#endif
+
+typedef enum
+{
+    kDefaultRoundingMode = 0,
+    kRoundToNearestEven,
+    kRoundUp,
+    kRoundDown,
+    kRoundTowardZero,
+
+    kRoundingModeCount
+}RoundingMode;
+
+typedef enum
+{
+    kuchar = 0,
+    kchar = 1,
+    kushort = 2,
+    kshort = 3,
+    kuint = 4,
+    kint = 5,
+    kfloat = 6,
+    kdouble = 7,
+    kulong = 8,
+    klong = 9,
+
+    //This goes last
+    kTypeCount
+}Type;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern RoundingMode set_round( RoundingMode r, Type outType );
+extern RoundingMode get_round( void );
+extern void *FlushToZero( void );
+extern void UnFlushToZero( void *p);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /* __ROUNDING_MODE_H__ */
--- a/test_conformance/compatibility/test_common/harness/testHarness.c
+++ b/test_conformance/compatibility/test_common/harness/testHarness.c
@@ -0,0 +1,842 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testHarness.h"
+#include "compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <string.h>
+#include "threadTesting.h"
+#include "errorHelpers.h"
+#include "kernelHelpers.h"
+#include "fpcontrol.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include <time.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#endif
+
+int gTestsPassed = 0;
+int gTestsFailed = 0;
+cl_uint gRandomSeed = 0;
+cl_uint gReSeed = 0;
+
+int     gFlushDenormsToZero = 0;
+int     gInfNanSupport = 1;
+int     gIsEmbedded = 0;
+int     gIsOpenCL_C_1_0_Device = 0;
+int     gIsOpenCL_1_0_Device = 0;
+int     gHasLong = 1;
+
+#define DEFAULT_NUM_ELEMENTS        0x4000
+
+int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
+                   basefn fnList[], const char *fnNames[],
+                   int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
+{
+    return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
+                          ( imageSupportRequired ) ? verifyImageSupport : NULL );
+}
+
+int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
+                 basefn fnList[], const char *fnNames[],
+                int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
+                DeviceCheckFn deviceCheckFn )
+{
+    test_start();
+    log_info("*** Compatibility with Previous Versions test ***\n");
+
+    cl_device_type    device_type = CL_DEVICE_TYPE_DEFAULT;
+    cl_uint            num_platforms = 0;
+    cl_platform_id     *platforms;
+    cl_device_id       device;
+    int                num_elements = DEFAULT_NUM_ELEMENTS;
+    cl_uint            num_devices = 0;
+    cl_device_id       *devices = NULL;
+    cl_uint            choosen_device_index = 0;
+    cl_uint            choosen_platform_index = 0;
+
+    int            err, ret;
+    char *endPtr;
+    unsigned int            i;
+    int based_on_env_var = 0;
+
+
+    /* Check for environment variable to set device type */
+    char *env_mode = getenv( "CL_DEVICE_TYPE" );
+    if( env_mode != NULL )
+    {
+        based_on_env_var = 1;
+        if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            device_type = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            device_type = CL_DEVICE_TYPE_DEFAULT;
+        else
+        {
+            log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
+            abort();
+        }
+    }
+
+#if defined( __APPLE__ )
+    {
+        // report on any unusual library search path indirection
+        char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
+
+        // report on any unusual framework search path indirection
+        char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
+    }
+#endif
+
+    env_mode = getenv( "CL_DEVICE_INDEX" );
+    if( env_mode != NULL )
+    {
+        choosen_device_index = atoi(env_mode);
+    }
+
+    env_mode = getenv( "CL_PLATFORM_INDEX" );
+    if( env_mode != NULL )
+    {
+        choosen_platform_index = atoi(env_mode);
+    }
+
+    /* Process the command line arguments */
+
+    /* Special case: just list the tests */
+    if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
+    {
+        log_info( "Usage: %s [<function name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
+        log_info( "\t<function name>\tOne or more of: (wildcard character '*') (default *)\n");
+        log_info( "\tpid<num>\t\tIndicates platform at index <num> should be used (default 0).\n" );
+        log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
+        log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
+
+        for( i = 0; i < num_fns - 1; i++ )
+        {
+            log_info( "\t\t%s\n", fnNames[ i ] );
+        }
+        test_finish();
+        return 0;
+    }
+
+    /* How are we supposed to seed the random # generators? */
+    if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
+    {
+        log_info(" Initializing random seed based on the clock.\n");
+        gRandomSeed = (unsigned)clock();
+        gReSeed = 1;
+        argc--;
+    }
+    else
+    {
+        log_info(" Initializing random seed to 0.\n");
+    }
+
+    /* Do we have an integer to specify the number of elements to pass to tests? */
+    if( argc > 1 )
+    {
+        ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
+        if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
+        {
+            /* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
+            /* (hence why we stored the result in ret first) */
+            num_elements = ret;
+            log_info( "Testing with num_elements of %d\n", num_elements );
+            argc--;
+        }
+    }
+
+    /* Do we have a CPU/GPU specification? */
+    if( argc > 1 )
+    {
+        if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_GPU;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_CPU;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_ACCELERATOR;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_DEFAULT;
+            argc--;
+        }
+    }
+
+    /* Did we choose a specific device index? */
+    if( argc > 1 )
+    {
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
+        {
+            choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
+            argc--;
+        }
+    }
+
+    /* Did we choose a specific platform index? */
+    if( argc > 1 )
+    {
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
+        {
+            choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
+            argc--;
+        }
+    }
+
+    switch( device_type )
+    {
+        case CL_DEVICE_TYPE_GPU:            log_info( "Requesting GPU device " ); break;
+        case CL_DEVICE_TYPE_CPU:            log_info( "Requesting CPU device " ); break;
+        case CL_DEVICE_TYPE_ACCELERATOR:    log_info( "Requesting Accelerator device " ); break;
+        case CL_DEVICE_TYPE_DEFAULT:        log_info( "Requesting Default device " ); break;
+        default:                            log_error( "Requesting unknown device "); return -1;
+    }
+    log_info( based_on_env_var ? "based on environment variable " : "based on command line " );
+    log_info( "for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
+
+#if defined( __APPLE__ )
+#if defined( __i386__ ) || defined( __x86_64__ )
+#define    kHasSSE3                0x00000008
+#define kHasSupplementalSSE3    0x00000100
+#define    kHasSSE4_1              0x00000400
+#define    kHasSSE4_2              0x00000800
+    /* check our environment for a hint to disable SSE variants */
+    {
+        const char *env = getenv( "CL_MAX_SSE" );
+        if( env )
+        {
+            extern int _cpu_capabilities;
+            int mask = 0;
+            if( 0 == strcasecmp( env, "SSE4.1" ) )
+                mask = kHasSSE4_2;
+            else if( 0 == strcasecmp( env, "SSSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1;
+            else if( 0 == strcasecmp( env, "SSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
+            else if( 0 == strcasecmp( env, "SSE2" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
+            else
+            {
+                log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
+                return -2;
+            }
+
+            log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
+            _cpu_capabilities &= ~mask;
+        }
+    }
+#endif
+#endif
+
+    /* Get the platform */
+    err = clGetPlatformIDs(0, NULL, &num_platforms);
+    if (err) {
+        print_error(err, "clGetPlatformIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
+    if (!platforms || choosen_platform_index >= num_platforms) {
+        log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
+        test_finish();
+        return -1;
+    }
+
+    err = clGetPlatformIDs(num_platforms, platforms, NULL);
+    if (err) {
+        print_error(err, "clGetPlatformIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    /* Get the number of requested devices */
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, 0, NULL, &num_devices );
+    if (err) {
+        print_error(err, "clGetDeviceIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
+    if (!devices || choosen_device_index >= num_devices) {
+        log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
+        test_finish();
+        return -1;
+    }
+
+    /* Get the requested device */
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, num_devices, devices, NULL );
+    if (err) {
+        print_error(err, "clGetDeviceIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    device = devices[choosen_device_index];
+    free(devices);
+    devices = NULL;
+    free(platforms);
+    platforms = NULL;
+
+    if( printDeviceHeader( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+    cl_device_fp_config fpconfig = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
+    if (err) {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
+        test_finish();
+        return -1;
+    }
+
+    gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
+    log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
+    log_info( "sizeof( void*) = %d  (host)\n", (int) sizeof( void* ) );
+
+    //detect whether profile of the device is embedded
+    char profile[1024] = "";
+    err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+    if (err)
+    {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
+        test_finish();
+        return -1;
+    }
+    gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
+
+    //detect the floating point capabilities
+    cl_device_fp_config floatCapabilities = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
+    if (err)
+    {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
+        test_finish();
+        return -1;
+    }
+
+    // Check for problems that only embedded will have
+    if( gIsEmbedded )
+    {
+        //If the device is embedded, we need to detect if the device supports Infinity and NaN
+        if ((floatCapabilities & CL_FP_INF_NAN) == 0)
+            gInfNanSupport = 0;
+
+        // check the extensions list to see if ulong and long are supported
+        size_t extensionsStringSize = 0;
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, 0, NULL, &extensionsStringSize ) ))
+        {
+            print_error( err, "Unable to get extensions string size for embedded device" );
+            test_finish();
+            return -1;
+        }
+        char *extensions_string = (char*) malloc(extensionsStringSize);
+        if( NULL == extensions_string )
+        {
+            print_error( CL_OUT_OF_HOST_MEMORY, "Unable to allocate storage for extensions string for embedded device" );
+            test_finish();
+            return -1;
+        }
+
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, extensionsStringSize, extensions_string, NULL ) ))
+        {
+            print_error( err, "Unable to get extensions string for embedded device" );
+            test_finish();
+            return -1;
+        }
+
+        if( extensions_string[extensionsStringSize-1] != '\0' )
+        {
+            log_error( "FAILURE: extensions string for embedded device is not NUL terminated" );
+            test_finish();
+            return -1;
+        }
+
+        if( NULL == strstr( extensions_string, "cles_khr_int64" ))
+            gHasLong = 0;
+
+        free(extensions_string);
+    }
+
+    if( getenv( "OPENCL_1_0_DEVICE" ) )
+    {
+        char c_version[1024];
+        gIsOpenCL_1_0_Device = 1;
+        memset( c_version, 0, sizeof( c_version ) );
+
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
+        {
+            log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
+            test_finish();
+            return -1;
+        }
+
+        if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
+        {
+            gIsOpenCL_C_1_0_Device = 1;
+            log_info( "Device is a OpenCL C 1.0 device\n" );
+        }
+        else
+            log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
+    }
+
+    cl_uint device_address_bits = 0;
+    if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
+    {
+        print_error( err, "Unable to obtain device address bits" );
+        test_finish();
+        return -1;
+    }
+    if( device_address_bits )
+        log_info( "sizeof( void*) = %d  (device)\n", device_address_bits/8 );
+    else
+    {
+        log_error("Invalid device address bit size returned by device.\n");
+        test_finish();
+        return -1;
+    }
+
+
+    /* If we have a device checking function, run it */
+    if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+    if (num_elements <= 0)
+        num_elements = DEFAULT_NUM_ELEMENTS;
+
+        // On most platforms which support denorm, default is FTZ off. However,
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
+#if defined(__APPLE__) && defined(__arm__)
+        FPU_mode_type oldMode;
+        DisableFTZ( &oldMode );
+#endif
+
+    int error = parseAndCallCommandLineTests( argc, argv, device, num_fns, fnList, fnNames, forceNoContextCreation, queueProps, num_elements );
+
+ #if defined(__APPLE__) && defined(__arm__)
+     // Restore the old FP mode before leaving.
+    RestoreFPState( &oldMode );
+#endif
+
+    return error;
+}
+
+static int find_wildcard_matching_functions( const char *fnNames[], unsigned char fnsToCall[], unsigned int num_fns,
+                                             const char *wildcard )
+{
+    int found_tests = 0;
+    size_t wildcard_length = strlen( wildcard ) - 1; /* -1 for the asterisk */
+
+    for( unsigned int fnIndex = 0; fnIndex < num_fns; fnIndex++ )
+    {
+        if( strncmp( fnNames[ fnIndex ], wildcard, wildcard_length ) == 0 )
+        {
+            if( fnsToCall[ fnIndex ] )
+            {
+                log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
+                return EXIT_FAILURE;
+            }
+
+            fnsToCall[ fnIndex ] = 1;
+            found_tests = 1;
+        }
+    }
+
+    if( !found_tests )
+    {
+        log_error( "ERROR: The wildcard '%s' did not match any test names.\n", wildcard );
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
+}
+
+static int find_argument_matching_function( const char *fnNames[], unsigned char *fnsToCall, unsigned int num_fns,
+                                            const char *argument )
+{
+    unsigned int fnIndex;
+
+    for( fnIndex = 0; fnIndex < num_fns; fnIndex++ )
+    {
+        if( strcmp( argument, fnNames[ fnIndex ] ) == 0 )
+        {
+            if( fnsToCall[ fnIndex ] )
+            {
+                log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
+                return EXIT_FAILURE;
+            }
+            else
+            {
+                fnsToCall[ fnIndex ] = 1;
+                break;
+            }
+        }
+    }
+
+    if( fnIndex == num_fns )
+    {
+        log_error( "ERROR: The argument '%s' did not match any test names.\n", argument );
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
+}
+
+int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
+                                  basefn fnList[], const char *fnNames[], int forceNoContextCreation,
+                                  cl_command_queue_properties queueProps, int num_elements )
+{
+    int ret = EXIT_SUCCESS;
+
+    unsigned char *fnsToCall = ( unsigned char* ) calloc( num_fns, 1 );
+
+    if( argc == 1 )
+    {
+        /* No actual arguments, all tests will be run. */
+        memset( fnsToCall, 1, num_fns );
+    }
+    else
+    {
+        for( int argIndex = 1; argIndex < argc; argIndex++ )
+        {
+            if( strchr( argv[ argIndex ], '*' ) != NULL )
+            {
+                ret = find_wildcard_matching_functions( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
+            }
+            else
+            {
+                if( strcmp( argv[ argIndex ], "all" ) == 0 )
+                {
+                    memset( fnsToCall, 1, num_fns );
+                    break;
+                }
+                else
+                {
+                    ret = find_argument_matching_function( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
+                }
+            }
+
+            if( ret == EXIT_FAILURE )
+            {
+                break;
+            }
+        }
+    }
+
+    if( ret == EXIT_SUCCESS )
+    {
+        ret = callTestFunctions( fnList, fnNames, fnsToCall, num_fns, device, forceNoContextCreation, num_elements, queueProps );
+
+        if( gTestsFailed == 0 )
+        {
+            if( gTestsPassed > 1 )
+            {
+                log_info("PASSED %d of %d tests.\n", gTestsPassed, gTestsPassed);
+            }
+            else if( gTestsPassed > 0 )
+            {
+                log_info("PASSED test.\n");
+            }
+        }
+        else if( gTestsFailed > 0 )
+        {
+            if( gTestsFailed+gTestsPassed > 1 )
+            {
+                log_error("FAILED %d of %d tests.\n", gTestsFailed, gTestsFailed+gTestsPassed);
+            }
+            else
+            {
+                log_error("FAILED test.\n");
+            }
+        }
+    }
+
+    test_finish();
+
+    free( fnsToCall );
+
+    return ret;
+}
+
+int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
+                       int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
+                       int numElementsToUse, cl_command_queue_properties queueProps )
+{
+    int numErrors = 0;
+
+    for( int i = 0; i < numFunctions; ++i )
+    {
+        if( functionsToCall[ i ] )
+        {
+            /* Skip any unimplemented tests. */
+            if( functionList[ i ] != NULL )
+            {
+                numErrors += callSingleTestFunction( functionList[ i ], functionNames[ i ], deviceToUse,
+                                                     forceNoContextCreation, numElementsToUse, queueProps );
+            }
+            else
+            {
+                log_info( "%s test currently not implemented\n", functionNames[ i ] );
+            }
+        }
+    }
+
+    return numErrors;
+}
+
+void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
+{
+    log_info( "%s\n", errinfo );
+}
+
+// Actual function execution
+int callSingleTestFunction( basefn functionToCall, const char *functionName,
+                           cl_device_id deviceToUse, int forceNoContextCreation,
+                           int numElementsToUse, cl_command_queue_properties queueProps )
+{
+    int numErrors = 0, ret;
+    cl_int error;
+    cl_context context = NULL;
+    cl_command_queue queue = NULL;
+
+    /* Create a context to work with, unless we're told not to */
+    if( !forceNoContextCreation )
+    {
+        context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
+        if (!context)
+        {
+            print_error( error, "Unable to create testing context" );
+            return 1;
+        }
+
+        queue = clCreateCommandQueue( context, deviceToUse, queueProps, &error );
+        if( queue == NULL )
+        {
+            print_error( error, "Unable to create testing command queue" );
+            return 1;
+        }
+    }
+
+    /* Run the test and print the result */
+    log_info( "%s...\n", functionName );
+    fflush( stdout );
+
+    ret = functionToCall( deviceToUse, context, queue, numElementsToUse);        //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
+    if( ret == TEST_NOT_IMPLEMENTED )
+    {
+        /* Tests can also let us know they're not implemented yet */
+        log_info("%s test currently not implemented\n\n", functionName);
+    }
+    else
+    {
+        /* Print result */
+        if( ret == 0 ) {
+            log_info( "%s passed\n", functionName );
+            gTestsPassed++;
+        }
+        else
+        {
+            numErrors++;
+            log_error( "%s FAILED\n", functionName );
+            gTestsFailed++;
+        }
+    }
+
+    /* Release the context */
+    if( !forceNoContextCreation )
+    {
+        int error = clFinish(queue);
+        if (error) {
+            log_error("clFinish failed: %d", error);
+            numErrors++;
+        }
+        clReleaseCommandQueue( queue );
+        clReleaseContext( context );
+    }
+
+    return numErrors;
+}
+
+void checkDeviceTypeOverride( cl_device_type *inOutType )
+{
+    /* Check if we are forced to CPU mode */
+    char *force_cpu = getenv( "CL_DEVICE_TYPE" );
+    if( force_cpu != NULL )
+    {
+        if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_DEFAULT;
+    }
+
+    switch( *inOutType )
+    {
+        case CL_DEVICE_TYPE_GPU:            log_info( "Requesting GPU device " ); break;
+        case CL_DEVICE_TYPE_CPU:            log_info( "Requesting CPU device " ); break;
+        case CL_DEVICE_TYPE_ACCELERATOR:    log_info( "Requesting Accelerator device " ); break;
+        case CL_DEVICE_TYPE_DEFAULT:        log_info( "Requesting Default device " ); break;
+        default: break;
+    }
+    log_info( force_cpu != NULL ? "based on environment variable\n" : "based on command line\n" );
+
+#if defined( __APPLE__ )
+    {
+        // report on any unusual library search path indirection
+        char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
+
+        // report on any unusual framework search path indirection
+        char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
+    }
+#endif
+
+}
+
+#if ! defined( __APPLE__ )
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
+{
+    uint32_t pat = ((uint32_t*) src_pattern)[0];
+    size_t count = bytes / 4;
+    size_t i;
+    uint32_t *d = (uint32_t*)dest;
+
+    for( i = 0; i < count; i++ )
+        d[i] = pat;
+
+    d += i;
+
+    bytes &= 3;
+    if( bytes )
+        memcpy( d, src_pattern, bytes );
+}
+#endif
+
+extern cl_device_type GetDeviceType( cl_device_id d )
+{
+    cl_device_type result = -1;
+    cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
+    if( CL_SUCCESS != err )
+        log_error( "ERROR: Unable to get device type for device %p\n", d );
+    return result;
+}
+
+
+cl_device_id GetOpposingDevice( cl_device_id device )
+{
+    cl_int error;
+    cl_device_id *otherDevices;
+    cl_uint actualCount;
+    cl_platform_id plat;
+
+    // Get the platform of the device to use for getting a list of devices
+    error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get device's platform" );
+        return NULL;
+    }
+
+    // Get a list of all devices
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get list of devices size" );
+        return NULL;
+    }
+    otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get list of devices" );
+        free(otherDevices);
+        return NULL;
+    }
+
+    if( actualCount == 1 )
+    {
+        free(otherDevices);
+        return device;    // NULL means error, returning self means we couldn't find another one
+    }
+
+    // Loop and just find one that isn't the one we were given
+    cl_uint i;
+    for( i = 0; i < actualCount; i++ )
+    {
+        if( otherDevices[ i ] != device )
+        {
+            cl_device_type newType;
+            error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
+            if( error != CL_SUCCESS )
+            {
+                print_error( error, "Unable to get device type for other device" );
+                free(otherDevices);
+                return NULL;
+            }
+            cl_device_id result = otherDevices[ i ];
+            free(otherDevices);
+            return result;
+        }
+    }
+
+    // Should never get here
+    free(otherDevices);
+    return NULL;
+}
+
+
--- a/test_conformance/compatibility/test_common/harness/testHarness.h
+++ b/test_conformance/compatibility/test_common/harness/testHarness.h
@@ -0,0 +1,100 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testHarness_h
+#define _testHarness_h
+
+#include "threadTesting.h"
+#include "clImageHelper.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern cl_uint gReSeed;
+extern cl_uint gRandomSeed;
+
+// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
+// setup work, and then call each function in turn as dictatated by the passed arguments.
+extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
+                            basefn fnList[], const char *fnNames[],
+                            int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
+
+// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
+typedef int (*DeviceCheckFn)( cl_device_id device );
+
+// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
+extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
+                              basefn fnList[], const char *fnNames[],
+                              int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
+
+// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
+extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
+                                        basefn *fnList, const char *fnNames[],
+                                        int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
+
+// Call this function if you need to do all the setup work yourself, and just need the function list called/
+// managed.
+//    functionList is the actual array of functions
+//    functionNames is an array of strings representing the name of each function
+//    functionsToCall is an array of integers (treated as bools) which tell which function is to be called,
+//       each element at index i, corresponds to the element in functionList at index i
+//    numFunctions is the number of elements in the arrays
+//    contextProps are used to create a testing context for each test
+//    deviceToUse and numElementsToUse are all just passed to each test function
+extern int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
+                              int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
+                              int numElementsToUse, cl_command_queue_properties queueProps );
+
+// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
+extern int callSingleTestFunction( basefn functionToCall, const char *functionName,
+                                   cl_device_id deviceToUse, int forceNoContextCreation,
+                                   int numElementsToUse, cl_command_queue_properties queueProps );
+
+///// Miscellaneous steps
+
+// Given a pre-existing device type choice, check the environment for an override, then print what
+// choice was made and how (and return the overridden choice, if there is one)
+extern void checkDeviceTypeOverride( cl_device_type *inOutType );
+
+// standard callback function for context pfn_notify
+extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
+
+extern cl_device_type GetDeviceType( cl_device_id );
+
+// Given a device (most likely passed in by the harness, but not required), will attempt to find
+// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
+// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
+// is the only device available, the SAME device is returned, so check!
+extern cl_device_id GetOpposingDevice( cl_device_id device );
+
+
+extern int      gFlushDenormsToZero;    // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
+extern int      gInfNanSupport;         // This is set to 1 if the device supports infinities and NaNs
+extern int        gIsEmbedded;            // This is set to 1 if the device is an embedded device
+extern int        gHasLong;               // This is set to 1 if the device suppots long and ulong types in OpenCL C.
+extern int      gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
+
+#if ! defined( __APPLE__ )
+    void     memset_pattern4(void *, const void *, size_t);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _testHarness_h
+
+
--- a/test_conformance/compatibility/test_common/harness/test_mt19937.c
+++ b/test_conformance/compatibility/test_common/harness/test_mt19937.c
@@ -0,0 +1,51 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "mt19937.h"
+#include <stdio.h>
+
+int main( void )
+{
+    MTdata d = init_genrand(42);
+    int i;
+    const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
+                                    0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
+                                    0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
+                                    0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
+    int errcount = 0;
+
+    for( i = 0; i < 65536; i++ )
+    {
+        cl_uint u = genrand_int32( d );
+        if( 0 == (i & 4095) )
+        {
+            if( u != reference[i>>12] )
+            {
+                printf("ERROR: expected *0x%8.8x at %d.  Got 0x%8.8x\n", reference[i>>12], i, u );
+                errcount++;
+            }
+        }
+    }
+
+    free_mtdata(d);
+
+    if( errcount )
+        printf("mt19937 test failed.\n");
+    else
+        printf("mt19937 test passed.\n");
+
+
+    return 0;
+}
--- a/test_conformance/compatibility/test_common/harness/threadTesting.c
+++ b/test_conformance/compatibility/test_common/harness/threadTesting.c
@@ -0,0 +1,106 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "threadTesting.h"
+#include "errorHelpers.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <pthread.h>
+#endif
+
+#if 0 // Disabed for now
+
+typedef struct
+{
+    basefn            mFunction;
+    cl_device_id    mDevice;
+    cl_context        mContext;
+    int                mNumElements;
+} TestFnArgs;
+
+////////////////////////////////////////////////////////////////////////////////
+// Thread-based testing. Spawns a new thread to run the given test function,
+// then waits for it to complete. The entire idea is that, if the thread crashes,
+// we can catch it and report it as a failure instead of crashing the entire suite
+////////////////////////////////////////////////////////////////////////////////
+
+void *test_thread_wrapper( void *data )
+{
+    TestFnArgs *args;
+    int retVal;
+    cl_context context;
+
+    args = (TestFnArgs *)data;
+
+    /* Create a new context to use (contexts can't cross threads) */
+    context = clCreateContext(NULL, args->mDeviceGroup);
+    if( context == NULL )
+    {
+        log_error("clCreateContext failed for new thread\n");
+        return (void *)(-1);
+    }
+
+    /* Call function */
+    retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
+
+    clReleaseContext( context );
+
+    return (void *)retVal;
+}
+
+int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    int error;
+    pthread_t threadHdl;
+    void *retVal;
+    TestFnArgs args;
+
+
+    args.mFunction = fnToTest;
+    args.mDeviceGroup = deviceGroup;
+    args.mDevice = device;
+    args.mContext = context;
+    args.mNumElements = numElements;
+
+
+    error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
+    if( error != 0 )
+    {
+        log_error( "ERROR: Unable to create thread for testing!\n" );
+        return -1;
+    }
+
+    /* Thread has been started, now just wait for it to complete (or crash) */
+    error = pthread_join( threadHdl, &retVal );
+    if( error != 0 )
+    {
+        log_error( "ERROR: Unable to join testing thread!\n" );
+        return -1;
+    }
+
+    return (int)((intptr_t)retVal);
+}
+#endif
+
+
--- a/test_conformance/compatibility/test_common/harness/threadTesting.h
+++ b/test_conformance/compatibility/test_common/harness/threadTesting.h
@@ -0,0 +1,32 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _threadTesting_h
+#define _threadTesting_h
+
+#ifdef __APPLE__
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/opencl.h>
+#endif
+
+#define TEST_NOT_IMPLEMENTED        -99
+
+typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+
+#endif // _threadTesting_h
+
+
--- a/test_conformance/compatibility/test_common/harness/typeWrappers.cpp
+++ b/test_conformance/compatibility/test_common/harness/typeWrappers.cpp
@@ -0,0 +1,481 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "typeWrappers.h"
+#include "kernelHelpers.h"
+#include "errorHelpers.h"
+#include <stdlib.h>
+#include "clImageHelper.h"
+
+#define ROUND_SIZE_UP( _size, _align )      (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
+
+#if defined( __APPLE__ )
+    #define kPageSize       4096
+    #include <sys/mman.h>
+    #include <stdlib.h>
+#elif defined(__linux__)
+    #include <unistd.h>
+    #define kPageSize  (getpagesize())
+#endif
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+    int protect_pages = 1;
+    cl_device_id devices[16];
+    size_t number_of_devices;
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+    number_of_devices /= sizeof(cl_device_id);
+    for (int i=0; i<(int)number_of_devices; i++) {
+        cl_device_type type;
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+        if (type == CL_DEVICE_TYPE_GPU) {
+            protect_pages = 0;
+            break;
+        }
+    }
+
+    if (protect_pages) {
+        size_t pixelBytes = get_pixel_bytes(fmt);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+        size_t rowStride = rowBytes + kPageSize;
+
+        // create backing store
+        backingStoreSize = rowStride + 8 * rowStride;
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+        // add guard pages
+        size_t row;
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+        p += rowBytes;
+        mprotect( p, kPageSize, PROT_NONE );        p += rowStride;
+        p -= rowBytes;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
+        {
+            static int spewEnv = 1;
+            if(spewEnv)
+            {
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+                spewEnv = 0;
+            }
+            imagePtr += rowBytes - pixelBytes * width;
+        }
+
+        image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
+    } else {
+        backingStore = NULL;
+        image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+
+    }
+#else
+
+    backingStore = NULL;
+    image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+
+#endif
+    return error;
+}
+
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width, height );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+
+    // create backing store
+    backingStoreSize = height * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
+    {
+      static int spewEnv = 1;
+      if(spewEnv)
+      {
+        log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+        spewEnv = 0;
+      }
+      imagePtr += rowBytes - pixelBytes * width;
+    }
+
+      image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+      image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+
+  }
+#else
+
+  backingStore = NULL;
+  image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+
+#endif
+    return error;
+}
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width, height, depth );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
+{
+    cl_int error;
+
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+
+    // create backing store
+    backingStoreSize = height * depth * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height*depth; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
+    {
+        static int spewEnv = 1;
+        if(spewEnv)
+        {
+            log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+            spewEnv = 0;
+        }
+        imagePtr += rowBytes - pixelBytes * width;
+    }
+
+    image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
+  }
+#else
+
+    backingStore = NULL;
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
+
+#endif
+
+    return error;
+}
+
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+    int protect_pages = 1;
+    cl_device_id devices[16];
+    size_t number_of_devices;
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+    number_of_devices /= sizeof(cl_device_id);
+    for (int i=0; i<(int)number_of_devices; i++) {
+        cl_device_type type;
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+        if (type == CL_DEVICE_TYPE_GPU) {
+            protect_pages = 0;
+            break;
+        }
+    }
+
+    if (protect_pages) {
+        size_t pixelBytes = get_pixel_bytes(fmt);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+        size_t rowStride = rowBytes + kPageSize;
+
+        // create backing store
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                backingStoreSize = rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                backingStoreSize = height * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                backingStoreSize = height * depth * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                backingStoreSize = arraySize * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
+                break;
+        }
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+        // add guard pages
+        size_t row;
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+        p += rowBytes;
+        size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
+        for( row = 0; row < sz; row++ )
+        {
+            mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+        }
+        p -= rowBytes;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
+        {
+            static int spewEnv = 1;
+            if(spewEnv)
+            {
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+                spewEnv = 0;
+            }
+            imagePtr += rowBytes - pixelBytes * width;
+        }
+
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
+                break;
+        }
+    } else {
+        backingStore = NULL;
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
+                break;
+        }
+
+    }
+#else
+
+    backingStore = NULL;
+    switch (imageType)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
+            break;
+    }
+#endif
+    return error;
+}
+
+
+
+/*******
+ * clProtectedArray implementation
+ *******/
+clProtectedArray::clProtectedArray()
+{
+    mBuffer = mValidBuffer = NULL;
+}
+
+clProtectedArray::clProtectedArray( size_t sizeInBytes )
+{
+    mBuffer = mValidBuffer = NULL;
+    Allocate( sizeInBytes );
+}
+
+clProtectedArray::~clProtectedArray()
+{
+    if( mBuffer != NULL ) {
+#if defined( __APPLE__ )
+        int error = munmap( mBuffer, mRealSize );
+      if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
+#else
+    free( mBuffer );
+#endif
+  }
+}
+
+void clProtectedArray::Allocate( size_t sizeInBytes )
+{
+
+#if defined( __APPLE__ )
+
+    // Allocate enough space to: round up our actual allocation to an even number of pages
+    // and allocate two pages on either side
+    mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
+    mRealSize = mRoundedSize + kPageSize * 2;
+
+    // Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
+    mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    mValidBuffer = mBuffer + kPageSize;
+
+    // Protect guard area from access
+    mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
+    mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
+#else
+  mRoundedSize = mRealSize = sizeInBytes;
+  mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
+#endif
+}
+
+
--- a/test_conformance/compatibility/test_common/harness/typeWrappers.h
+++ b/test_conformance/compatibility/test_common/harness/typeWrappers.h
@@ -0,0 +1,333 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _typeWrappers_h
+#define _typeWrappers_h
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#include "compat.h"
+#include <stdio.h>
+#include "mt19937.h"
+#include "errorHelpers.h"
+#include "kernelHelpers.h"
+
+extern "C" cl_uint gReSeed;
+extern "C" cl_uint gRandomSeed;
+
+/* cl_context wrapper */
+
+class clContextWrapper
+{
+    public:
+        clContextWrapper() { mContext = NULL; }
+        clContextWrapper( cl_context program ) { mContext = program; }
+        ~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
+
+        clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
+        operator cl_context() { return mContext; }
+
+        cl_context * operator&() { return &mContext; }
+
+        bool operator==( const cl_context &rhs ) { return mContext == rhs; }
+
+    protected:
+
+        cl_context mContext;
+};
+
+/* cl_program wrapper */
+
+class clProgramWrapper
+{
+    public:
+        clProgramWrapper() { mProgram = NULL; }
+        clProgramWrapper( cl_program program ) { mProgram = program; }
+        ~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
+
+        clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
+        operator cl_program() { return mProgram; }
+
+        cl_program * operator&() { return &mProgram; }
+
+        bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
+
+    protected:
+
+        cl_program mProgram;
+};
+
+/* cl_kernel wrapper */
+
+class clKernelWrapper
+{
+    public:
+        clKernelWrapper() { mKernel = NULL; }
+        clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
+        ~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
+
+        clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
+        operator cl_kernel() { return mKernel; }
+
+        cl_kernel * operator&() { return &mKernel; }
+
+        bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
+
+    protected:
+
+        cl_kernel mKernel;
+};
+
+/* cl_mem (stream) wrapper */
+
+class clMemWrapper
+{
+    public:
+        clMemWrapper() { mMem = NULL; }
+        clMemWrapper( cl_mem mem ) { mMem = mem; }
+        ~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
+
+        clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
+        operator cl_mem() { return mMem; }
+
+        cl_mem * operator&() { return &mMem; }
+
+        bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_mem mMem;
+};
+
+class clProtectedImage
+{
+    public:
+        clProtectedImage() { image = NULL; backingStore = NULL; }
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
+        ~clProtectedImage()
+        {
+            if( image != NULL )
+                clReleaseMemObject( image );
+
+#if defined( __APPLE__ )
+            if(backingStore)
+                munmap(backingStore, backingStoreSize);
+#endif
+        }
+
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
+        cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
+
+        clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
+        operator cl_mem() { return image; }
+
+        cl_mem * operator&() { return &image; }
+
+        bool operator==( const cl_mem &rhs ) { return image == rhs; }
+
+    protected:
+        void *backingStore;
+        size_t backingStoreSize;
+        cl_mem  image;
+};
+
+/* cl_command_queue wrapper */
+
+class clCommandQueueWrapper
+{
+    public:
+        clCommandQueueWrapper() { mMem = NULL; }
+        clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
+        ~clCommandQueueWrapper() { if( mMem != NULL ) {int error = clFinish(mMem); if (error) print_error(error, "clFinish failed"); clReleaseCommandQueue( mMem );} }
+
+        clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
+        operator cl_command_queue() { return mMem; }
+
+        cl_command_queue * operator&() { return &mMem; }
+
+        bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_command_queue mMem;
+};
+
+/* cl_sampler wrapper */
+class clSamplerWrapper
+{
+    public:
+        clSamplerWrapper() { mMem = NULL; }
+        clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
+        ~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
+
+        clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
+        operator cl_sampler() { return mMem; }
+
+        cl_sampler * operator&() { return &mMem; }
+
+        bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_sampler mMem;
+};
+
+/* cl_event wrapper */
+class clEventWrapper
+{
+    public:
+        clEventWrapper() { mMem = NULL; }
+        clEventWrapper( cl_event mem ) { mMem = mem; }
+        ~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
+
+        clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
+        operator cl_event() { return mMem; }
+
+        cl_event * operator&() { return &mMem; }
+
+        bool operator==( const cl_event &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_event mMem;
+};
+
+/* Generic protected memory buffer, for verifying access within bounds */
+class clProtectedArray
+{
+    public:
+        clProtectedArray();
+        clProtectedArray( size_t sizeInBytes );
+        virtual ~clProtectedArray();
+
+        void    Allocate( size_t sizeInBytes );
+
+        operator void *()        { return (void *)mValidBuffer; }
+        operator const void *() const { return (const void *)mValidBuffer; }
+
+    protected:
+
+         char *    mBuffer;
+         char * mValidBuffer;
+        size_t    mRealSize, mRoundedSize;
+};
+
+class RandomSeed
+{
+    public:
+        RandomSeed( cl_uint seed  ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
+        ~RandomSeed()
+        {
+            if( gReSeed )
+                gRandomSeed = genrand_int32( mtData );
+            free_mtdata(mtData);
+        }
+
+        operator MTdata ()     {return mtData;}
+
+    protected:
+        MTdata mtData;
+};
+
+template <typename T> class BufferOwningPtr
+{
+  BufferOwningPtr(BufferOwningPtr const &); // do not implement
+    void operator=(BufferOwningPtr const &);  // do not implement
+
+    void *ptr;
+    void *map;
+  size_t mapsize;   // Bytes allocated total, pointed to by map.
+  size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
+  bool aligned;
+  public:
+  explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
+  explicit BufferOwningPtr(void *p, void *m, size_t s)
+    : ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
+      {
+#if ! defined( __APPLE__ )
+        if(m)
+        {
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
+            abort();
+        }
+#endif
+      }
+    ~BufferOwningPtr() {
+      if (map) {
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+#endif
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
+    }
+  void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
+      if (map){
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+#else
+        log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
+        abort();
+#endif
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
+      ptr = p;
+      map = m;
+      mapsize = mapsize_;
+      allocsize = allocsize_;
+      aligned = aligned_;
+#if ! defined( __APPLE__ )
+        if(m)
+        {
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
+            abort();
+        }
+#endif
+    }
+    operator T*() { return (T*)ptr; }
+
+      size_t getSize() const { return allocsize; };
+};
+
+#endif // _typeWrappers_h
+
+
--- a/test_conformance/compatibility/test_conformance/CMakeLists.txt
+++ b/test_conformance/compatibility/test_conformance/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(api)
+add_subdirectory(basic)
--- a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt
@@ -0,0 +1,34 @@
+set(MODULE_NAME COMPATIBILITY_API)
+
+set(${MODULE_NAME}_SOURCES
+         main.c
+         test_bool.c
+         test_retain.cpp
+         test_retain_program.c
+         test_queries.cpp
+         test_create_kernels.c
+         test_kernels.c
+         test_api_min_max.c
+         test_kernel_arg_changes.cpp
+         test_kernel_arg_multi_setup.cpp
+         test_binary.cpp
+         test_native_kernel.cpp
+         test_mem_objects.cpp
+         test_create_context_from_type.cpp
+         test_device_min_data_type_align_size_alignment.cpp
+         test_platform.cpp
+         test_mem_object_info.cpp
+         test_null_buffer_arg.c
+         test_kernel_arg_info.c
+         ../../test_common/harness/errorHelpers.c
+         ../../test_common/harness/threadTesting.c
+         ../../test_common/harness/testHarness.c
+         ../../test_common/harness/kernelHelpers.c
+         ../../test_common/harness/typeWrappers.cpp
+         ../../test_common/harness/conversions.c
+         ../../test_common/harness/mt19937.c
+         ../../test_common/harness/msvc9.c
+         ../../test_common/harness/imageHelpers.cpp
+)
+
+include(../../../CMakeCommon.txt)
--- a/test_conformance/compatibility/test_conformance/api/Jamfile
+++ b/test_conformance/compatibility/test_conformance/api/Jamfile
@@ -0,0 +1,27 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+
+exe test_api
+    : main.c
+      test_api_min_max.c
+      test_binary.cpp
+      test_create_kernels.c
+      test_create_context_from_type.cpp
+      test_kernel_arg_changes.cpp
+      test_kernel_arg_multi_setup.cpp
+      test_kernels.c
+      test_native_kernel.cpp
+      test_queries.cpp
+      test_retain_program.c
+      test_platform.cpp 
+    ;
+
+install dist
+    : test_api #test.lst
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/api
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/api
+    ;
--- a/test_conformance/compatibility/test_conformance/api/Makefile
+++ b/test_conformance/compatibility/test_conformance/api/Makefile
@@ -0,0 +1,61 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+			test_retain_program.c \
+			test_queries.cpp \
+			test_create_kernels.c \
+			test_kernels.c \
+            test_kernel_arg_info.c \
+			test_api_min_max.c \
+			test_kernel_arg_changes.cpp \
+			test_kernel_arg_multi_setup.cpp \
+			test_binary.cpp \
+			test_native_kernel.cpp \
+			test_create_context_from_type.cpp \
+			test_platform.cpp \
+			test_retain.cpp \
+			test_device_min_data_type_align_size_alignment.cpp \
+			test_mem_objects.cpp \
+            test_bool.c \
+            test_null_buffer_arg.c \
+            test_mem_object_info.cpp \
+            ../../test_common/harness/errorHelpers.c \
+			../../test_common/harness/threadTesting.c \
+			../../test_common/harness/testHarness.c \
+			../../test_common/harness/imageHelpers.cpp \
+			../../test_common/harness/kernelHelpers.c \
+			../../test_common/harness/typeWrappers.cpp \
+			../../test_common/harness/mt19937.c \
+			../../test_common/harness/conversions.c
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+TARGET = test_api
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
--- a/test_conformance/compatibility/test_conformance/api/main.c
+++ b/test_conformance/compatibility/test_conformance/api/main.c
@@ -0,0 +1,214 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
+// (for example, generate_random_image_data()), the tests are required to declare
+// the following variables:
+cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+bool gTestRounding = false;
+
+basefn    basefn_list[] = {
+    test_get_platform_info,
+    test_get_sampler_info,
+    test_get_command_queue_info,
+    test_get_context_info,
+    test_get_device_info,
+    test_enqueue_task,
+    test_binary_get,
+    test_program_binary_create,
+    test_kernel_required_group_size,
+
+    test_release_kernel_order,
+    test_release_during_execute,
+
+    test_load_single_kernel,
+    test_load_two_kernels,
+    test_load_two_kernels_in_one,
+    test_load_two_kernels_manually,
+    test_get_program_info_kernel_names,
+    test_get_kernel_arg_info,
+    test_create_kernels_in_program,
+    test_get_kernel_info,
+    test_execute_kernel_local_sizes,
+    test_set_kernel_arg_by_index,
+    test_set_kernel_arg_constant,
+    test_set_kernel_arg_struct_array,
+    test_kernel_global_constant,
+
+    test_min_max_thread_dimensions,
+    test_min_max_work_items_sizes,
+    test_min_max_work_group_size,
+    test_min_max_read_image_args,
+    test_min_max_write_image_args,
+    test_min_max_mem_alloc_size,
+    test_min_max_image_2d_width,
+    test_min_max_image_2d_height,
+    test_min_max_image_3d_width,
+    test_min_max_image_3d_height,
+    test_min_max_image_3d_depth,
+    test_min_max_image_array_size,
+    test_min_max_image_buffer_size,
+    test_min_max_parameter_size,
+    test_min_max_samplers,
+    test_min_max_constant_buffer_size,
+    test_min_max_constant_args,
+    test_min_max_compute_units,
+    test_min_max_address_bits,
+    test_min_max_single_fp_config,
+    test_min_max_double_fp_config,
+    test_min_max_local_mem_size,
+    test_min_max_kernel_preferred_work_group_size_multiple,
+    test_min_max_execution_capabilities,
+    test_min_max_queue_properties,
+    test_min_max_device_version,
+    test_min_max_language_version,
+
+    test_kernel_arg_changes,
+    test_kernel_arg_multi_setup_random,
+
+    test_native_kernel,
+
+    test_create_context_from_type,
+
+    test_platform_extensions,
+    test_get_platform_ids,
+    test_for_bool_type,
+
+    test_repeated_setup_cleanup,
+
+    test_retain_queue_single,
+    test_retain_queue_multiple,
+    test_retain_mem_object_single,
+    test_retain_mem_object_multiple,
+    test_min_data_type_align_size_alignment,
+
+    test_mem_object_destructor_callback,
+    test_null_buffer_arg,
+    test_get_buffer_info,
+    test_get_image2d_info,
+    test_get_image3d_info,
+    test_get_image1d_info,
+    test_get_image1d_array_info,
+    test_get_image2d_array_info,
+};
+
+
+const char    *basefn_names[] = {
+    "get_platform_info",
+    "get_sampler_info",
+    "get_command_queue_info",
+    "get_context_info",
+    "get_device_info",
+    "enqueue_task",
+    "binary_get",
+    "binary_create",
+    "kernel_required_group_size",
+
+    "release_kernel_order",
+    "release_during_execute",
+
+    "load_single_kernel",
+    "load_two_kernels",
+    "load_two_kernels_in_one",
+    "load_two_kernels_manually",
+    "get_program_info_kernel_names",
+    "get_kernel_arg_info",
+    "create_kernels_in_program",
+    "get_kernel_info",
+    "execute_kernel_local_sizes",
+    "set_kernel_arg_by_index",
+    "set_kernel_arg_constant",
+    "set_kernel_arg_struct_array",
+    "kernel_global_constant",
+
+    "min_max_thread_dimensions",
+    "min_max_work_items_sizes",
+    "min_max_work_group_size",
+    "min_max_read_image_args",
+    "min_max_write_image_args",
+    "min_max_mem_alloc_size",
+    "min_max_image_2d_width",
+    "min_max_image_2d_height",
+    "min_max_image_3d_width",
+    "min_max_image_3d_height",
+    "min_max_image_3d_depth",
+    "min_max_image_array_size",
+    "min_max_image_buffer_size",
+    "min_max_parameter_size",
+    "min_max_samplers",
+    "min_max_constant_buffer_size",
+    "min_max_constant_args",
+    "min_max_compute_units",
+    "min_max_address_bits",
+    "min_max_single_fp_config",
+    "min_max_double_fp_config",
+    "min_max_local_mem_size",
+    "min_max_kernel_preferred_work_group_size_multiple",
+    "min_max_execution_capabilities",
+    "min_max_queue_properties",
+    "min_max_device_version",
+    "min_max_language_version",
+
+    "kernel_arg_changes",
+    "kernel_arg_multi_setup_random",
+
+    "native_kernel",
+
+    "create_context_from_type",
+    "platform_extensions",
+
+    "get_platform_ids",
+    "bool_type",
+
+    "repeated_setup_cleanup",
+
+    "retain_queue_single",
+    "retain_queue_multiple",
+    "retain_mem_object_single",
+    "retain_mem_object_multiple",
+
+    "min_data_type_align_size_alignment",
+
+    "mem_object_destructor_callback",
+    "null_buffer_arg",
+    "get_buffer_info",
+    "get_image2d_info",
+    "get_image3d_info",
+    "get_image1d_info",
+    "get_image1d_array_info",
+    "get_image2d_array_info",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
--- a/test_conformance/compatibility/test_conformance/api/procs.h
+++ b/test_conformance/compatibility/test_conformance/api/procs.h
@@ -0,0 +1,108 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/clImageHelper.h"
+#include "../../test_common/harness/imageHelpers.h"
+extern float    calculate_ulperror(float a, float b);
+
+extern int        test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernels_manually(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_for_bool_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_platform_extensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int      test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_kernel_arg_changes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_kernel_arg_multi_setup_random(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int        test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_null_buffer_arg( cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+
--- a/test_conformance/compatibility/test_conformance/api/testBase.h
+++ b/test_conformance/compatibility/test_conformance/api/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
--- a/test_conformance/compatibility/test_conformance/api/test_api_min_max.c
+++ b/test_conformance/compatibility/test_conformance/api/test_api_min_max.c
--- a/test_conformance/compatibility/test_conformance/api/test_binary.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_binary.cpp
@@ -0,0 +1,226 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+static const char *sample_binary_kernel_source[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid] + 1;\n"
+"\n"
+"}\n" };
+
+
+int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    size_t            binarySize;
+
+
+    program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
+    test_error( error, "Unable to create program from source" );
+
+    // Build so we have a binary to get
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build test program" );
+
+    // Get the size of the resulting binary (only one device)
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
+    test_error( error, "Unable to get binary size" );
+
+    // Sanity check
+    if( binarySize == 0 )
+    {
+        log_error( "ERROR: Binary size of program is zero\n" );
+        return -1;
+    }
+
+    // Create a buffer and get the actual binary
+    unsigned char *binary;
+  binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
+    unsigned char *buffers[ 1 ] = { binary };
+
+    // Do another sanity check here first
+    size_t size;
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
+    test_error( error, "Unable to get expected size of binaries array" );
+    if( size != sizeof( buffers ) )
+    {
+        log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
+        free(binary);
+    return -1;
+    }
+
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary" );
+
+    // No way to verify the binary is correct, so just be good with that
+  free(binary);
+    return 0;
+}
+
+
+int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    /* To test this in a self-contained fashion, we have to create a program with
+   source, then get the binary, then use that binary to reload the program, and then verify */
+
+    int error;
+    clProgramWrapper program, program_from_binary;
+    size_t            binarySize;
+
+
+    program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
+    test_error( error, "Unable to create program from source" );
+
+    // Build so we have a binary to get
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build test program" );
+
+    // Get the size of the resulting binary (only one device)
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
+    test_error( error, "Unable to get binary size" );
+
+    // Sanity check
+    if( binarySize == 0 )
+    {
+        log_error( "ERROR: Binary size of program is zero\n" );
+        return -1;
+    }
+
+    // Create a buffer and get the actual binary
+    unsigned char *binary = (unsigned char*)malloc(binarySize);
+    const unsigned char *buffers[ 1 ] = { binary };
+
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary" );
+
+    cl_int loadErrors[ 1 ];
+    program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
+    test_error( error, "Unable to load valid program binary" );
+    test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
+
+  error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL );
+  test_error( error, "Unable to build binary program" );
+
+    // Get the size of the binary built from the first binary
+    size_t binary2Size;
+    error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARY_SIZES, sizeof( binary2Size ), &binary2Size, NULL );
+    test_error( error, "Unable to get size for the binary program" );
+
+    // Now get the binary one more time and verify it loaded the right binary
+    unsigned char *binary2 = (unsigned char*)malloc(binary2Size);
+    buffers[ 0 ] = binary2;
+    error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary second time" );
+
+    // Try again, this time without passing the status ptr in, to make sure we still
+    // get a valid binary
+    clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binary2Size, buffers, NULL, &error );
+    test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
+
+    error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build binary program created without binary_status" );
+
+    // Get the size of the binary created without passing binary_status
+    size_t binary3Size;
+    error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARY_SIZES, sizeof( binary3Size ), &binary3Size, NULL );
+    test_error( error, "Unable to get size for the binary program created without binary_status" );
+
+    // Now get the binary one more time
+    unsigned char *binary3 = (unsigned char*)malloc(binary3Size);
+    buffers[ 0 ] = binary3;
+    error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary from the program created without binary_status" );
+
+    // We no longer need these intermediate binaries
+    free(binary);
+    free(binary2);
+    free(binary3);
+
+  // Now execute them both to see that they both do the same thing.
+  clMemWrapper in, out, out_binary;
+  clKernelWrapper kernel, kernel_binary;
+  cl_int *out_data, *out_data_binary;
+  cl_float *in_data;
+  size_t size_to_run = 1000;
+
+  // Allocate some data
+  in_data = (cl_float*)malloc(sizeof(cl_float)*size_to_run);
+  out_data = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
+  out_data_binary = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
+  memset(out_data, 0, sizeof(cl_int)*size_to_run);
+  memset(out_data_binary, 0, sizeof(cl_int)*size_to_run);
+  for (size_t i=0; i<size_to_run; i++)
+    in_data[i] = (cl_float)i;
+
+  // Create the buffers
+  in = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*size_to_run, in_data, &error);
+  test_error( error, "clCreateBuffer failed");
+  out = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data, &error);
+  test_error( error, "clCreateBuffer failed");
+  out_binary = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data_binary, &error);
+  test_error( error, "clCreateBuffer failed");
+
+  // Create the kernels
+  kernel = clCreateKernel(program, "sample_test", &error);
+  test_error( error, "clCreateKernel failed");
+  kernel_binary = clCreateKernel(program_from_binary, "sample_test", &error);
+  test_error( error, "clCreateKernel from binary failed");
+
+  // Set the arguments
+  error = clSetKernelArg(kernel, 0, sizeof(in), &in);
+  test_error( error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel, 1, sizeof(out), &out);
+  test_error( error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel_binary, 0, sizeof(in), &in);
+  test_error( error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel_binary, 1, sizeof(out_binary), &out_binary);
+  test_error( error, "clSetKernelArg failed");
+
+  // Execute the kernels
+  error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
+  test_error( error, "clEnqueueNDRangeKernel failed");
+  error = clEnqueueNDRangeKernel(queue, kernel_binary, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
+  test_error( error, "clEnqueueNDRangeKernel for binary kernel failed");
+
+  // Finish up
+  error = clFinish(queue);
+  test_error( error, "clFinish failed");
+
+  // Get the results back
+  error = clEnqueueReadBuffer(queue, out, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data, 0, NULL, NULL);
+  test_error( error, "clEnqueueReadBuffer failed");
+  error = clEnqueueReadBuffer(queue, out_binary, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data_binary, 0, NULL, NULL);
+  test_error( error, "clEnqueueReadBuffer failed");
+
+  // Compare the results
+    if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
+    {
+        log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
+        return -1;
+    }
+
+    // All done!
+  free(in_data);
+  free(out_data);
+  free(out_data_binary);
+    return 0;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/api/test_bool.c
+++ b/test_conformance/compatibility/test_conformance/api/test_bool.c
@@ -0,0 +1,52 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+
+
+const char *kernel_with_bool[] = {
+    "__kernel void kernel_with_bool(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    bool myBool = (src[tid] < 0.5f) && (src[tid] > -0.5f);\n"
+    "    if(myBool)\n"
+    "    {\n"
+    "        dst[tid] = (int)src[tid];\n"
+    "    }\n"
+    "    else\n"
+    "    {\n"
+    "        dst[tid] = 0;\n"
+    "    }\n"
+    "\n"
+    "}\n"
+};
+
+int test_for_bool_type(cl_device_id deviceID, cl_context context,
+               cl_command_queue queue, int num_elements)
+{
+
+    cl_program program;
+    cl_kernel kernel;
+
+    int err = create_single_kernel_helper(context,
+                      &program,
+                      &kernel,
+                      1, kernel_with_bool,
+                      "kernel_with_bool" );
+    return err;
+}
+
--- a/test_conformance/compatibility/test_conformance/api/test_create_context_from_type.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_create_context_from_type.cpp
@@ -0,0 +1,130 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/conversions.h"
+
+extern cl_uint gRandomSeed;
+
+int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper    streams[2];
+    clContextWrapper context_to_test;
+    clCommandQueueWrapper queue_to_test;
+    size_t    threads[1], localThreads[1];
+    cl_float inputData[10];
+    cl_int outputData[10];
+    int i;
+    RandomSeed seed( gRandomSeed );
+
+    const char *sample_single_test_kernel[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n" };
+
+    cl_device_type type;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed\n");
+
+    cl_platform_id platform;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed\n");
+
+    cl_context_properties properties[3] = {
+      (cl_context_properties)CL_CONTEXT_PLATFORM,
+      (cl_context_properties)platform,
+      NULL
+    };
+
+    context_to_test = clCreateContextFromType(properties, type, notify_callback, NULL, &error);
+    test_error(error, "clCreateContextFromType failed");
+    if (context_to_test == NULL) {
+        log_error("clCreateContextFromType returned NULL, but error was CL_SUCCESS.");
+        return -1;
+    }
+
+    queue_to_test = clCreateCommandQueue(context_to_test, deviceID, NULL, &error);
+    test_error(error, "clCreateCommandQueue failed");
+    if (queue_to_test == NULL) {
+        log_error("clCreateCommandQueue returned NULL, but error was CL_SUCCESS.");
+        return -1;
+    }
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Write some test data */
+    memset( outputData, 0, sizeof( outputData ) );
+
+    for (i=0; i<10; i++)
+        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
+
+    error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
+    test_error( error, "Unable to set testing kernel data" );
+
+    /* Test setting the arguments by index manually */
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+  return 0;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/api/test_create_kernels.c
+++ b/test_conformance/compatibility/test_conformance/api/test_create_kernels.c
@@ -0,0 +1,643 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+
+
+const char *sample_single_kernel[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n" };
+
+size_t sample_single_kernel_lengths[1];
+
+const char *sample_two_kernels[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n",
+    "__kernel void sample_test2(__global int *src, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)src[tid];\n"
+    "\n"
+    "}\n" };
+
+size_t sample_two_kernel_lengths[2];
+
+const char *sample_two_kernels_in_1[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n"
+    "__kernel void sample_test2(__global int *src, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)src[tid];\n"
+    "\n"
+    "}\n" };
+
+size_t sample_two_kernels_in_1_lengths[1];
+
+
+const char *repeate_test_kernel =
+"__kernel void test_kernel(__global int *src, __global int *dst)\n"
+"{\n"
+" dst[get_global_id(0)] = src[get_global_id(0)]+1;\n"
+"}\n";
+
+
+
+int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    cl_program testProgram;
+    clKernelWrapper kernel;
+    cl_context testContext;
+    unsigned int numKernels;
+    cl_char testName[512];
+    cl_uint testArgCount;
+    size_t realSize;
+
+
+    /* Preprocess: calc the length of each source file line */
+    sample_single_kernel_lengths[ 0 ] = strlen( sample_single_kernel[ 0 ] );
+
+    /* Create a program */
+    program = clCreateProgramWithSource( context, 1, sample_single_kernel, sample_single_kernel_lengths, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create single kernel program" );
+        return -1;
+    }
+
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build single kernel program" );
+    error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels);
+    test_error( error, "Unable to create single kernel program" );
+
+    /* Check program and context pointers */
+    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( cl_program ), &testProgram, &realSize );
+    test_error( error, "Unable to get kernel's program" );
+    if( (cl_program)testProgram != (cl_program)program )
+    {
+        log_error( "ERROR: Returned kernel's program does not match program used to create it! (Got %p, expected %p)\n", (cl_program)testProgram, (cl_program)program );
+        return -1;
+    }
+    if( realSize != sizeof( cl_program ) )
+    {
+        log_error( "ERROR: Returned size of kernel's program does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_program ), (int)realSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( cl_context ), &testContext, &realSize );
+    test_error( error, "Unable to get kernel's context" );
+    if( (cl_context)testContext != (cl_context)context )
+    {
+        log_error( "ERROR: Returned kernel's context does not match program used to create it! (Got %p, expected %p)\n", (cl_context)testContext, (cl_context)context );
+        return -1;
+    }
+    if( realSize != sizeof( cl_context ) )
+    {
+        log_error( "ERROR: Returned size of kernel's context does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_context ), (int)realSize );
+        return -1;
+    }
+
+    /* Test arg count */
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &realSize );
+    test_error( error, "Unable to get size of arg count info from kernel" );
+
+    if( realSize != sizeof( testArgCount ) )
+    {
+        log_error( "ERROR: size of arg count not valid! %d\n", (int)realSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
+    test_error( error, "Unable to get arg count from kernel" );
+
+    if( testArgCount != 2 )
+    {
+        log_error( "ERROR: Kernel arg count does not match!\n" );
+        return -1;
+    }
+
+
+    /* Test function name */
+    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, &realSize );
+    test_error( error, "Unable to get name from kernel" );
+
+    if( strcmp( (char *)testName, "sample_test" ) != 0 )
+    {
+        log_error( "ERROR: Kernel names do not match!\n" );
+        return -1;
+    }
+    if( realSize != strlen( (char *)testName ) + 1 )
+    {
+        log_error( "ERROR: Length of kernel name returned does not validate (expected %d, got %d)\n", (int)strlen( (char *)testName ) + 1, (int)realSize );
+        return -1;
+    }
+
+    /* All done */
+
+    return 0;
+}
+
+int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel[2];
+    unsigned int numKernels;
+    cl_char testName[ 512 ];
+    cl_uint testArgCount;
+
+
+    /* Preprocess: calc the length of each source file line */
+    sample_two_kernel_lengths[ 0 ] = strlen( sample_two_kernels[ 0 ] );
+    sample_two_kernel_lengths[ 1 ] = strlen( sample_two_kernels[ 1 ] );
+
+    /* Now create a test program */
+    program = clCreateProgramWithSource( context, 2, sample_two_kernels, sample_two_kernel_lengths, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create dual kernel program!" );
+        return -1;
+    }
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build dual kernel program" );
+    error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
+    test_error( error, "Unable to create dual kernel program" );
+
+    if( numKernels != 2 )
+    {
+        log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
+        return -1;
+    }
+
+    /* Check first kernel */
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from kernel" );
+
+    int found_kernel1 = 0, found_kernel2 = 0;
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from second kernel" );
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        if (found_kernel1) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        if (found_kernel2) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    if( !found_kernel1 || !found_kernel2 )
+    {
+        log_error( "ERROR: Kernel names do not match.\n" );
+        if (!found_kernel1)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        if (!found_kernel2)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
+    test_error( error, "Unable to get arg count from kernel" );
+
+    if( testArgCount != 2 )
+    {
+        log_error( "ERROR: wrong # of args for kernel\n" );
+        return -1;
+    }
+
+    /* All done */
+    return 0;
+}
+
+int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel[2];
+    unsigned int numKernels;
+    cl_char testName[512];
+    cl_uint testArgCount;
+
+
+    /* Preprocess: calc the length of each source file line */
+    sample_two_kernels_in_1_lengths[ 0 ] = strlen( sample_two_kernels_in_1[ 0 ] );
+
+    /* Now create a test program */
+    program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, sample_two_kernels_in_1_lengths, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create dual kernel program" );
+        return -1;
+    }
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build dual kernel program" );
+    error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
+    test_error( error, "Unable to create dual kernel program" );
+
+    if( numKernels != 2 )
+    {
+        log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
+        return -1;
+    }
+
+    /* Check first kernel */
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from kernel" );
+
+    int found_kernel1 = 0, found_kernel2 = 0;
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
+    test_error( error, "Unable to get arg count from kernel" );
+
+    if( testArgCount != 2 )
+    {
+        log_error( "ERROR: wrong # of args for kernel\n" );
+        return -1;
+    }
+
+    /* Check second kernel */
+    error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from kernel" );
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        if (found_kernel1) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        if (found_kernel2) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    if( !found_kernel1 || !found_kernel2 )
+    {
+        log_error( "ERROR: Kernel names do not match.\n" );
+        if (!found_kernel1)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        if (!found_kernel2)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        return -1;
+    }
+
+    /* All done */
+    return 0;
+}
+
+int test_load_two_kernels_manually( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel1, kernel2;
+    int error;
+
+
+    /* Now create a test program */
+    program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create dual kernel program" );
+        return -1;
+    }
+
+    /* Compile the program */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build kernel program" );
+
+    /* Try manually creating kernels (backwards just in case) */
+    kernel1 = clCreateKernel( program, "sample_test2", &error );
+
+    if( kernel1 == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Could not get kernel 1" );
+        return -1;
+    }
+
+    kernel2 = clCreateKernel( program, "sample_test", &error );
+
+    if( kernel2 == NULL )
+    {
+        print_error( error, "Could not get kernel 2" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel1, kernel2;
+    int error;
+    size_t i;
+
+    /* Now create a test program */
+    program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create dual kernel program" );
+        return -1;
+    }
+
+    /* Compile the program */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build kernel program" );
+
+    /* Lookup the number of kernels in the program. */
+    size_t total_kernels = 0;
+    error = clGetProgramInfo(program, CL_PROGRAM_NUM_KERNELS, sizeof(size_t),&total_kernels,NULL);
+    test_error( error, "Unable to get program info num kernels");
+
+    if (total_kernels != 2)
+    {
+        print_error( error, "Program did not contain two kernels" );
+        return -1;
+    }
+
+    /* Lookup the kernel names. */
+    const char* actual_names[] = { "sample_test;sample_test2", "sample_test2;sample_test"} ;
+
+    size_t kernel_names_len = 0;
+    error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,0,NULL,&kernel_names_len);
+    test_error( error, "Unable to get length of kernel names list." );
+
+    if (kernel_names_len != (strlen(actual_names[0])+1))
+    {
+        print_error( error, "Kernel names length did not match");
+        return -1;
+    }
+
+    const size_t len = (kernel_names_len+1)*sizeof(char);
+    char* kernel_names = (char*)malloc(len);
+    error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,len,kernel_names,&kernel_names_len);
+    test_error( error, "Unable to get kernel names list." );
+
+    /* Check to see if the kernel name array is null terminated. */
+    if (kernel_names[kernel_names_len-1] != '\0')
+    {
+        free(kernel_names);
+        print_error( error, "Kernel name list was not null terminated");
+        return -1;
+    }
+
+    /* Check to see if the correct kernel name string was returned. */
+    for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
+        if( 0 == strcmp(actual_names[i],kernel_names) )
+            break;
+
+    if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) )
+    {
+        free(kernel_names);
+        log_error( "Kernel names \"%s\" did not match:\n", kernel_names );
+        for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
+            log_error( "\t\t\"%s\"\n", actual_names[0] );
+        return -1;
+    }
+    free(kernel_names);
+
+    /* Try manually creating kernels (backwards just in case) */
+    kernel1 = clCreateKernel( program, "sample_test", &error );
+    if( kernel1 == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Could not get kernel 1" );
+        return -1;
+    }
+
+    kernel2 = clCreateKernel( program, "sample_test2", &error );
+    if( kernel2 == NULL )
+    {
+        print_error( error, "Could not get kernel 2" );
+        return -1;
+    }
+
+    return 0;
+}
+
+static const char *single_task_kernel[] = {
+    "__kernel void sample_test(__global int *dst, int count)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    for( int i = 0; i < count; i++ )\n"
+    "        dst[i] = tid + i;\n"
+    "\n"
+    "}\n" };
+
+int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper output;
+    cl_int count;
+
+
+    if( create_single_kernel_helper( context, &program, &kernel, 1, single_task_kernel, "sample_test" ) )
+        return -1;
+
+    // Create args
+    count = 100;
+    output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( cl_int ), &count );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Run task
+    error = clEnqueueTask( queue, kernel, 0, NULL, NULL );
+    test_error( error, "Unable to run task" );
+
+    // Read results
+    cl_int *results = (cl_int*)malloc(sizeof(cl_int)*count);
+    error = clEnqueueReadBuffer( queue, output, CL_TRUE, 0, sizeof( cl_int ) * count, results, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Validate
+    for( cl_int i = 0; i < count; i++ )
+    {
+        if( results[ i ] != i )
+        {
+            log_error( "ERROR: Task result value %d did not validate! Expected %d, got %d\n", (int)i, (int)i, (int)results[ i ] );
+            free(results);
+            return -1;
+        }
+    }
+
+    /* All done */
+    free(results);
+    return 0;
+}
+
+
+
+#define TEST_SIZE 1000
+int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+
+    cl_context local_context;
+    cl_command_queue local_queue;
+    cl_program local_program;
+    cl_kernel local_kernel;
+    cl_mem local_mem_in, local_mem_out;
+    cl_event local_event;
+    size_t global_dim[3];
+    int i, j, error;
+    global_dim[0] = TEST_SIZE;
+    global_dim[1] = 1; global_dim[2] = 1;
+    cl_int *inData, *outData;
+    cl_int status;
+
+    inData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
+    outData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
+    for (i=0; i<TEST_SIZE; i++) {
+        inData[i] = i;
+    }
+
+
+    for (i=0; i<100; i++) {
+        memset(outData, 0, sizeof(cl_int)*TEST_SIZE);
+
+        local_context = clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &error);
+        test_error( error, "clCreateContext failed");
+
+        local_queue = clCreateCommandQueue(local_context, deviceID, 0, &error);
+        test_error( error, "clCreateCommandQueue failed");
+
+        local_program = clCreateProgramWithSource(local_context, 1, &repeate_test_kernel, NULL, &error);
+        test_error( error, "clCreateProgramWithSource failed");
+
+        error = clBuildProgram(local_program, 0, NULL, NULL, NULL, NULL);
+        test_error( error, "clBuildProgram failed");
+
+        local_kernel = clCreateKernel(local_program, "test_kernel", &error);
+        test_error( error, "clCreateKernel failed");
+
+        local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
+        test_error( error, "clCreateBuffer failed");
+
+        local_mem_out = clCreateBuffer(local_context, CL_MEM_WRITE_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
+        test_error( error, "clCreateBuffer failed");
+
+        error = clEnqueueWriteBuffer(local_queue, local_mem_in, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), inData, 0, NULL, NULL);
+        test_error( error, "clEnqueueWriteBuffer failed");
+
+        error = clEnqueueWriteBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
+        test_error( error, "clEnqueueWriteBuffer failed");
+
+        error = clSetKernelArg(local_kernel, 0, sizeof(local_mem_in), &local_mem_in);
+        test_error( error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(local_kernel, 1, sizeof(local_mem_out), &local_mem_out);
+        test_error( error, "clSetKernelArg failed");
+
+        error = clEnqueueNDRangeKernel(local_queue, local_kernel, 1, NULL, global_dim, NULL, 0, NULL, &local_event);
+        test_error( error, "clEnqueueNDRangeKernel failed");
+
+        error = clWaitForEvents(1, &local_event);
+        test_error( error, "clWaitForEvents failed");
+
+        error = clGetEventInfo(local_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL);
+        test_error( error, "clGetEventInfo failed");
+
+        if (status != CL_COMPLETE) {
+            log_error( "Kernel execution not complete: status %d.\n", status);
+            free(inData);
+            free(outData);
+            return -1;
+        }
+
+        error = clEnqueueReadBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
+        test_error( error, "clEnqueueReadBuffer failed");
+
+        clReleaseEvent(local_event);
+        clReleaseMemObject(local_mem_in);
+        clReleaseMemObject(local_mem_out);
+        clReleaseKernel(local_kernel);
+        clReleaseProgram(local_program);
+        clReleaseCommandQueue(local_queue);
+        clReleaseContext(local_context);
+
+        for (j=0; j<TEST_SIZE; j++) {
+            if (outData[j] != inData[j] + 1) {
+                log_error("Results failed to validate at iteration %d. %d != %d.\n", i, outData[j], inData[j] + 1);
+                free(inData);
+                free(outData);
+                return -1;
+            }
+        }
+    }
+
+    free(inData);
+    free(outData);
+
+    return 0;
+}
+
+
+
--- a/test_conformance/compatibility/test_conformance/api/test_device_min_data_type_align_size_alignment.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_device_min_data_type_align_size_alignment.cpp
@@ -0,0 +1,60 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+int IsAPowerOfTwo( unsigned long x )
+{
+  return 0 == (x & (x-1));
+}
+
+
+int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+  cl_uint min_alignment;
+
+  if (gHasLong)
+    min_alignment = sizeof(cl_long)*16;
+  else
+    min_alignment = sizeof(cl_int)*16;
+
+  int error = 0;
+  cl_uint alignment;
+
+  error = clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(alignment), &alignment, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_MEM_BASE_ADDR_ALIGN failed");
+  log_info("Device reported CL_DEVICE_MEM_BASE_ADDR_ALIGN = %lu bits.\n", (unsigned long)alignment);
+
+  // Verify the size is large enough
+  if (alignment < min_alignment*8) {
+    log_error("ERROR: alignment too small. Minimum alignment for %s16 is %lu bits, device reported %lu bits.",
+              (gHasLong) ? "long" : "int",
+              (unsigned long)(min_alignment*8), (unsigned long)alignment);
+    return -1;
+  }
+
+  // Verify the size is a power of two
+  if (!IsAPowerOfTwo((unsigned long)alignment)) {
+    log_error("ERROR: alignment is not a power of two.\n");
+    return -1;
+  }
+
+  return 0;
+
+}
--- a/test_conformance/compatibility/test_conformance/api/test_kernel_arg_changes.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_kernel_arg_changes.cpp
@@ -0,0 +1,141 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+extern "C" { extern cl_uint gRandomSeed;}
+
+// This test is designed to stress changing kernel arguments between execute calls (that are asynchronous and thus
+// potentially overlapping) to make sure each kernel gets the right arguments
+
+// Note: put a delay loop in the kernel to make sure we have time to queue the next kernel before this one finishes
+const char *inspect_image_kernel_source[] = {
+"__kernel void sample_test(read_only image2d_t src, __global int *outDimensions )\n"
+"{\n"
+"    int tid = get_global_id(0), i;\n"
+"     for( i = 0; i < 100000; i++ ); \n"
+"    outDimensions[tid * 2] = get_image_width(src) * tid;\n"
+"    outDimensions[tid * 2 + 1] = get_image_height(src) * tid;\n"
+"\n"
+"}\n" };
+
+#define NUM_TRIES    100
+#define NUM_THREADS 2048
+
+int test_kernel_arg_changes(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error, i;
+    clMemWrapper images[ NUM_TRIES ];
+    size_t         sizes[ NUM_TRIES ][ 2 ];
+    clMemWrapper results[ NUM_TRIES ];
+    cl_image_format    imageFormat;
+    size_t maxWidth, maxHeight;
+    size_t threads[1], localThreads[1];
+    cl_int resultArray[ NUM_THREADS * 2 ];
+    char errStr[ 128 ];
+    RandomSeed seed( gRandomSeed );
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    // Just get any ol format to test with
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &imageFormat );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    // Create our testing kernel
+    error = create_single_kernel_helper( context, &program, &kernel, 1, inspect_image_kernel_source, "sample_test" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Get max dimensions for each of our images
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    test_error( error, "Unable to get max image dimensions for device" );
+
+    // Get the number of threads we'll be able to run
+    threads[0] = NUM_THREADS;
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size for kernel" );
+
+    // Create a variety of images and output arrays
+    for( i = 0; i < NUM_TRIES; i++ )
+    {
+        sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
+        sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
+
+        images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
+                                     &imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
+        if( images[i] == NULL )
+        {
+            log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
+            return -1;
+        }
+        results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof( cl_int ) * threads[0] * 2, NULL, &error );
+        if( results[i] == NULL)
+        {
+            log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);
+            return -1;
+        }
+    }
+
+    // Start setting arguments and executing kernels
+    for( i = 0; i < NUM_TRIES; i++ )
+    {
+        // Set the arguments for this try
+        error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &images[ i ] );
+        sprintf( errStr, "Unable to set argument 0 for kernel try %d", i );
+        test_error( error, errStr );
+
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &results[ i ] );
+        sprintf( errStr, "Unable to set argument 1 for kernel try %d", i );
+        test_error( error, errStr );
+
+        // Queue up execution
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+        sprintf( errStr, "Unable to execute kernel try %d", i );
+        test_error( error, errStr );
+    }
+
+    // Read the results back out, one at a time, and verify
+    for( i = 0; i < NUM_TRIES; i++ )
+    {
+        error = clEnqueueReadBuffer( queue, results[ i ], CL_TRUE, 0, sizeof( cl_int ) * threads[0] * 2, resultArray, 0, NULL, NULL );
+        sprintf( errStr, "Unable to read results for kernel try %d", i );
+        test_error( error, errStr );
+
+        // Verify. Each entry should be n * the (width/height) of image i
+        for( int j = 0; j < NUM_THREADS; j++ )
+        {
+            if( resultArray[ j * 2 + 0 ] != (int)sizes[ i ][ 0 ] * j )
+            {
+                log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a width of %d, got %d\n",
+                          i, j, (int)sizes[ i ][ 0 ] * j, resultArray[ j * 2 + 0 ] );
+                return -1;
+            }
+            if( resultArray[ j * 2 + 1 ] != (int)sizes[ i ][ 1 ] * j )
+            {
+                log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a height of %d, got %d\n",
+                          i, j, (int)sizes[ i ][ 1 ] * j, resultArray[ j * 2 + 1 ] );
+                return -1;
+            }
+        }
+    }
+
+    // If we got here, everything verified successfully
+    return 0;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/api/test_kernel_arg_info.c
+++ b/test_conformance/compatibility/test_conformance/api/test_kernel_arg_info.c
--- a/test_conformance/compatibility/test_conformance/api/test_kernel_arg_multi_setup.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_kernel_arg_multi_setup.cpp
@@ -0,0 +1,277 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+
+// This test is designed to stress passing multiple vector parameters to kernels and verifying access between them all
+
+const char *multi_arg_kernel_source_pattern =
+"__kernel void sample_test(__global %s *src1, __global %s *src2, __global %s *src3, __global %s *dst1, __global %s *dst2, __global %s *dst3 )\n"
+"{\n"
+"    int tid = get_global_id(0);\n"
+"    dst1[tid] = src1[tid];\n"
+"    dst2[tid] = src2[tid];\n"
+"    dst3[tid] = src3[tid];\n"
+"}\n";
+
+extern cl_uint gRandomSeed;
+
+#define MAX_ERROR_TOLERANCE 0.0005f
+
+int test_multi_arg_set(cl_device_id device, cl_context context, cl_command_queue queue,
+                       ExplicitType vec1Type, int vec1Size,
+                       ExplicitType vec2Type, int vec2Size,
+                       ExplicitType vec3Type, int vec3Size, MTdata d)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error, i, j;
+    clMemWrapper streams[ 6 ];
+    size_t threads[1], localThreads[1];
+    char programSrc[ 10248 ], vec1Name[ 64 ], vec2Name[ 64 ], vec3Name[ 64 ];
+    char sizeNames[][ 4 ] = { "", "2", "3", "4", "", "", "", "8" };
+    const char *ptr;
+    void *initData[3], *resultData[3];
+
+
+    // Create the program source
+    sprintf( vec1Name, "%s%s", get_explicit_type_name( vec1Type ), sizeNames[ vec1Size - 1 ] );
+    sprintf( vec2Name, "%s%s", get_explicit_type_name( vec2Type ), sizeNames[ vec2Size - 1 ] );
+    sprintf( vec3Name, "%s%s", get_explicit_type_name( vec3Type ), sizeNames[ vec3Size - 1 ] );
+
+    sprintf( programSrc, multi_arg_kernel_source_pattern,
+            vec1Name, vec2Name, vec3Name, vec1Name, vec2Name, vec3Name,
+            vec1Size, vec1Size, vec2Size, vec2Size, vec3Size, vec3Size );
+    ptr = programSrc;
+
+    // Create our testing kernel
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Get thread dimensions
+    threads[0] = 1024;
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size for kernel" );
+
+    // Create input streams
+    initData[ 0 ] = create_random_data( vec1Type, d, (unsigned int)threads[ 0 ] * vec1Size );
+    streams[ 0 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, initData[ 0 ], &error );
+    test_error( error, "Unable to create testing stream" );
+
+    initData[ 1 ] = create_random_data( vec2Type, d, (unsigned int)threads[ 0 ] * vec2Size );
+    streams[ 1 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, initData[ 1 ], &error );
+    test_error( error, "Unable to create testing stream" );
+
+    initData[ 2 ] = create_random_data( vec3Type, d, (unsigned int)threads[ 0 ] * vec3Size );
+    streams[ 2 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, initData[ 2 ], &error );
+    test_error( error, "Unable to create testing stream" );
+
+    streams[ 3 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, NULL, &error );
+    test_error( error, "Unable to create testing stream" );
+
+    streams[ 4 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, NULL, &error );
+    test_error( error, "Unable to create testing stream" );
+
+    streams[ 5 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, NULL, &error );
+    test_error( error, "Unable to create testing stream" );
+
+    // Set the arguments
+    error = 0;
+    for( i = 0; i < 6; i++ )
+        error |= clSetKernelArg( kernel, i, sizeof( cl_mem ), &streams[ i ] );
+    test_error( error, "Unable to set arguments for kernel" );
+
+    // Execute!
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute kernel" );
+
+    // Read results
+    resultData[0] = malloc( get_explicit_type_size( vec1Type ) * vec1Size * threads[0] );
+    resultData[1] = malloc( get_explicit_type_size( vec2Type ) * vec2Size * threads[0] );
+    resultData[2] = malloc( get_explicit_type_size( vec3Type ) * vec3Size * threads[0] );
+    error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, get_explicit_type_size( vec1Type ) * vec1Size * threads[ 0 ], resultData[0], 0, NULL, NULL );
+    error |= clEnqueueReadBuffer( queue, streams[ 4 ], CL_TRUE, 0, get_explicit_type_size( vec2Type ) * vec2Size * threads[ 0 ], resultData[1], 0, NULL, NULL );
+    error |= clEnqueueReadBuffer( queue, streams[ 5 ], CL_TRUE, 0, get_explicit_type_size( vec3Type ) * vec3Size * threads[ 0 ], resultData[2], 0, NULL, NULL );
+    test_error( error, "Unable to read result stream" );
+
+    // Verify
+    char *ptr1 = (char *)initData[ 0 ], *ptr2 = (char *)resultData[ 0 ];
+    size_t span = get_explicit_type_size( vec1Type );
+    for( i = 0; i < (int)threads[0]; i++ )
+    {
+        for( j = 0; j < vec1Size; j++ )
+        {
+            if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
+            {
+                log_error( "ERROR: Value did not validate for component %d of item %d of stream 0!\n", j, i );
+                free( initData[ 0 ] );
+                free( initData[ 1 ] );
+                free( initData[ 2 ] );
+                free( resultData[ 0 ] );
+                free( resultData[ 1 ] );
+                free( resultData[ 2 ] );
+                return -1;
+            }
+        }
+        ptr1 += span * vec1Size;
+        ptr2 += span * vec1Size;
+    }
+
+    ptr1 = (char *)initData[ 1 ];
+    ptr2 = (char *)resultData[ 1 ];
+    span = get_explicit_type_size( vec2Type );
+    for( i = 0; i < (int)threads[0]; i++ )
+    {
+        for( j = 0; j < vec2Size; j++ )
+        {
+            if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
+            {
+                log_error( "ERROR: Value did not validate for component %d of item %d of stream 1!\n", j, i );
+                free( initData[ 0 ] );
+                free( initData[ 1 ] );
+                free( initData[ 2 ] );
+                free( resultData[ 0 ] );
+                free( resultData[ 1 ] );
+                free( resultData[ 2 ] );
+                return -1;
+            }
+        }
+        ptr1 += span * vec2Size;
+        ptr2 += span * vec2Size;
+    }
+
+    ptr1 = (char *)initData[ 2 ];
+    ptr2 = (char *)resultData[ 2 ];
+    span = get_explicit_type_size( vec3Type );
+    for( i = 0; i < (int)threads[0]; i++ )
+    {
+        for( j = 0; j < vec3Size; j++ )
+        {
+            if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
+            {
+                log_error( "ERROR: Value did not validate for component %d of item %d of stream 2!\n", j, i );
+                free( initData[ 0 ] );
+                free( initData[ 1 ] );
+                free( initData[ 2 ] );
+                free( resultData[ 0 ] );
+                free( resultData[ 1 ] );
+                free( resultData[ 2 ] );
+                return -1;
+            }
+        }
+        ptr1 += span * vec3Size;
+        ptr2 += span * vec3Size;
+    }
+
+    // If we got here, everything verified successfully
+    free( initData[ 0 ] );
+    free( initData[ 1 ] );
+    free( initData[ 2 ] );
+    free( resultData[ 0 ] );
+    free( resultData[ 1 ] );
+    free( resultData[ 2 ] );
+
+    return 0;
+}
+
+int test_kernel_arg_multi_setup_exhaustive(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    // Loop through every combination of input and output types
+    ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
+    int type1, type2, type3;
+    int size1, size2, size3;
+    RandomSeed seed( gRandomSeed );
+
+    log_info( "\n" ); // for formatting
+
+    for( type1 = 0; types[ type1 ] != kNumExplicitTypes; type1++ )
+    {
+        for( type2 = 0; types[ type2 ] != kNumExplicitTypes; type2++ )
+        {
+            for( type3 = 0; types[ type3 ] != kNumExplicitTypes; type3++ )
+            {
+                log_info( "\n\ttesting %s, %s, %s...", get_explicit_type_name( types[ type1 ] ), get_explicit_type_name( types[ type2 ] ), get_explicit_type_name( types[ type3 ] ) );
+
+                // Loop through every combination of vector size
+                for( size1 = 2; size1 <= 8; size1 <<= 1 )
+                {
+                    for( size2 = 2; size2 <= 8; size2 <<= 1 )
+                    {
+                        for( size3 = 2; size3 <= 8; size3 <<= 1 )
+                        {
+                            log_info(".");
+                            fflush( stdout);
+                            if( test_multi_arg_set( device, context, queue,
+                                                   types[ type1 ], size1,
+                                                   types[ type2 ], size2,
+                                                   types[ type3 ], size3, seed ) )
+                                return -1;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    log_info( "\n" );
+    return 0;
+}
+
+int test_kernel_arg_multi_setup_random(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    // Loop through a selection of combinations
+    ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
+    int type1, type2, type3;
+    int size1, size2, size3;
+    RandomSeed seed( gRandomSeed );
+
+    num_elements = 3*3*3*4;
+    log_info( "Testing %d random configurations\n", num_elements );
+
+    // Loop through every combination of vector size
+    for( size1 = 2; size1 <= 8; size1 <<= 1 )
+    {
+        for( size2 = 2; size2 <= 8; size2 <<= 1 )
+        {
+            for( size3 = 2; size3 <= 8; size3 <<= 1 )
+            {
+                // Loop through 4 type combinations for each size combination
+                int n;
+                for (n=0; n<4; n++) {
+                    type1 = (int)get_random_float(0,4, seed);
+                    type2 = (int)get_random_float(0,4, seed);
+                    type3 = (int)get_random_float(0,4, seed);
+
+
+                    log_info( "\ttesting %s%d, %s%d, %s%d...\n",
+                             get_explicit_type_name( types[ type1 ] ), size1,
+                             get_explicit_type_name( types[ type2 ] ), size2,
+                             get_explicit_type_name( types[ type3 ] ), size3 );
+
+                    if( test_multi_arg_set( device, context, queue,
+                                           types[ type1 ], size1,
+                                           types[ type2 ], size2,
+                                           types[ type3 ], size3, seed ) )
+                        return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+
+
+
--- a/test_conformance/compatibility/test_conformance/api/test_kernels.c
+++ b/test_conformance/compatibility/test_conformance/api/test_kernels.c
@@ -0,0 +1,704 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+
+extern cl_uint gRandomSeed;
+
+const char *sample_single_test_kernel[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n" };
+
+const char *sample_struct_test_kernel[] = {
+"typedef struct {\n"
+"__global int *A;\n"
+"__global int *B;\n"
+"} input_pair_t;\n"
+"\n"
+"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src->A[tid] + src->B[tid];\n"
+"\n"
+"}\n" };
+
+const char *sample_struct_array_test_kernel[] = {
+"typedef struct {\n"
+"int A;\n"
+"int B;\n"
+"} input_pair_t;\n"
+"\n"
+"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src[tid].A + src[tid].B;\n"
+"\n"
+"}\n" };
+
+const char *sample_const_test_kernel[] = {
+"__kernel void sample_test(__constant int *src1, __constant int *src2, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src1[tid] + src2[tid];\n"
+"\n"
+"}\n" };
+
+const char *sample_const_global_test_kernel[] = {
+"__constant int addFactor = 1024;\n"
+"__kernel void sample_test(__global int *src1, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src1[tid] + addFactor;\n"
+"\n"
+"}\n" };
+
+const char *sample_two_kernel_program[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n",
+"__kernel void sample_test2(__global int *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (float)src[tid];\n"
+"\n"
+"}\n" };
+
+
+
+
+int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, testProgram;
+    cl_context testContext;
+    cl_kernel kernel;
+    cl_char name[ 512 ];
+    cl_uint numArgs, numInstances;
+    size_t paramSize;
+
+
+    /* Create reference */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, &paramSize );
+    test_error( error, "Unable to get kernel function name param size" );
+    if( paramSize != strlen( "sample_test" ) + 1 )
+    {
+        log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL );
+    test_error( error, "Unable to get kernel function name" );
+    if( strcmp( (char *)name, "sample_test" ) != 0 )
+    {
+        log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name );
+        return -1;
+    }
+
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &paramSize );
+    test_error( error, "Unable to get kernel arg count param size" );
+    if( paramSize != sizeof( numArgs ) )
+    {
+        log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
+    test_error( error, "Unable to get kernel arg count" );
+    if( numArgs != 2 )
+    {
+        log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs );
+        return -1;
+    }
+
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, &paramSize );
+    test_error( error, "Unable to get kernel reference count param size" );
+    if( paramSize != sizeof( numInstances ) )
+    {
+        log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
+    test_error( error, "Unable to get kernel reference count" );
+
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, &paramSize );
+    test_error( error, "Unable to get kernel program param size" );
+    if( paramSize != sizeof( testProgram ) )
+    {
+        log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL );
+    test_error( error, "Unable to get kernel program" );
+    if( testProgram != program )
+    {
+        log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL );
+    test_error( error, "Unable to get kernel context" );
+    if( testContext != context )
+    {
+        log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext );
+        return -1;
+    }
+
+    /* Release memory */
+    clReleaseKernel( kernel );
+    clReleaseProgram( program );
+    return 0;
+}
+
+int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_float inputData[100];
+    cl_int outputData[100];
+    RandomSeed seed( gRandomSeed );
+    int i;
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 100, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 100, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Write some test data */
+    memset( outputData, 0, sizeof( outputData ) );
+
+    for (i=0; i<100; i++)
+        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
+
+    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL);
+    test_error( error, "Unable to set testing kernel data" );
+
+    /* Set the arguments */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)100;
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    /* Try again */
+    if( localThreads[0] > 1 )
+        localThreads[0] /= 2;
+    while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
+        localThreads[0]--;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    /* And again */
+    if( localThreads[0] > 1 )
+        localThreads[0] /= 2;
+    while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
+        localThreads[0]--;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    /* One more time */
+    localThreads[0] = (unsigned int)1;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper    streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_float inputData[10];
+    cl_int outputData[10];
+    RandomSeed seed( gRandomSeed );
+    int i;
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Write some test data */
+    memset( outputData, 0, sizeof( outputData ) );
+
+    for (i=0; i<10; i++)
+        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
+
+    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
+    test_error( error, "Unable to set testing kernel data" );
+
+    /* Test setting the arguments by index manually */
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_kernel kernel;
+    void            *args[2];
+    cl_mem            outStream;
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    cl_int randomTestDataA[10], randomTestDataB[10];
+    MTdata  d;
+
+    struct img_pair_t
+    {
+        cl_mem streamA;
+        cl_mem streamB;
+    } image_pair;
+
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        randomTestDataA[i] = (cl_int)genrand_int32(d);
+        randomTestDataB[i] = (cl_int)genrand_int32(d);
+    }
+    free_mtdata(d); d = NULL;
+
+    image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
+    test_error( error, "Creating test array failed" );
+    image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
+    test_error( error, "Creating test array failed" );
+    outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    args[0] = &image_pair;
+    args[1] = outStream;
+
+    error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
+        {
+            log_error( "ERROR: Data did not verify!\n" );
+            return -1;
+        }
+    }
+
+
+    clReleaseMemObject( image_pair.streamA );
+    clReleaseMemObject( image_pair.streamB );
+    clReleaseMemObject( outStream );
+    clReleaseKernel( kernel );
+    clReleaseProgram( program );
+
+    return 0;
+}
+
+int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[3];
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    cl_int randomTestDataA[10], randomTestDataB[10];
+    cl_ulong maxSize;
+    MTdata d;
+
+    /* Verify our test buffer won't be bigger than allowed */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( error, "Unable to get max constant buffer size" );
+    if( maxSize < sizeof( cl_int ) * 10 )
+    {
+        log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize );
+        return -1;
+    }
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff;    /* Make sure values are positive, just so we don't have to */
+        randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff;    /* deal with overflow on the verification */
+    }
+    free_mtdata(d); d = NULL;
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
+    test_error( error, "Creating test array failed" );
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
+        {
+            log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    MTdata d;
+
+    typedef struct img_pair_type
+    {
+        int A;
+        int B;
+    } image_pair_t;
+
+    image_pair_t image_pair[ 10 ];
+
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        image_pair[i].A = (cl_int)genrand_int32(d);
+        image_pair[i].A = (cl_int)genrand_int32(d);
+    }
+    free_mtdata(d); d = NULL;
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != image_pair[i].A + image_pair[i].B)
+        {
+            log_error( "ERROR: Data did not verify!\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_kernel  kernel[3];
+    unsigned int kernelCount;
+
+    /* Create a test program */
+    program = clCreateProgramWithSource( context, 2, sample_two_kernel_program, NULL, &error);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create test program!\n" );
+        return -1;
+    }
+
+    /* Build */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build test program" );
+
+    /* Try getting the kernel count */
+    error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount );
+    test_error( error, "Unable to get kernel count for built program" );
+    if( kernelCount != 2 )
+    {
+        log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount );
+        return -1;
+    }
+
+    /* Try actually getting the kernels */
+    error = clCreateKernelsInProgram( program, 2, kernel, NULL );
+    test_error( error, "Unable to get kernels for built program" );
+    clReleaseKernel( kernel[0] );
+    clReleaseKernel( kernel[1] );
+
+    clReleaseProgram( program );
+    return 0;
+}
+
+int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    cl_int randomTestDataA[10];
+    MTdata d;
+
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff;    /* Make sure values are positive and small, just so we don't have to */
+    }
+    free_mtdata(d); d = NULL;
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != randomTestDataA[i] + 1024)
+        {
+            log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+
--- a/test_conformance/compatibility/test_conformance/api/test_mem_object_info.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_mem_object_info.cpp
@@ -0,0 +1,750 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+
+extern cl_uint gRandomSeed;
+
+
+#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast )    \
+error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size );   \
+test_error( error, "Unable to get mem object " name );  \
+if( val != expected )   \
+{   \
+log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type " from %s:%d)\n",   \
+expected, (cast)val, __FILE__, __LINE__ );   \
+return -1;  \
+}   \
+if( size != sizeof( val ) ) \
+{   \
+log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d from %s:%d)\n", \
+(int)sizeof( val ), (int)size , __FILE__, __LINE__ );   \
+return -1;  \
+}
+
+static void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void * data )
+{
+    free( data );
+}
+
+static unsigned int
+get_image_dim(MTdata *d, unsigned int mod)
+{
+    unsigned int val = 0;
+
+    do
+    {
+        val = (unsigned int)genrand_int32(*d) % mod;
+    } while (val == 0);
+
+    return val;
+}
+
+
+int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    int error;
+    size_t size;
+    void * buffer = NULL;
+
+    clMemWrapper bufferObject;
+    clMemWrapper subBufferObject;
+
+    cl_mem_flags bufferFlags[] = {
+        CL_MEM_READ_WRITE,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_READ_ONLY,
+        CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_WRITE_ONLY,
+        CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+    };
+
+    cl_mem_flags subBufferFlags[] = {
+        CL_MEM_READ_WRITE,
+        CL_MEM_READ_ONLY,
+        CL_MEM_WRITE_ONLY,
+        0,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_READ_ONLY | 0,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | 0,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_NO_ACCESS | 0,
+    };
+
+
+    // Get the address alignment, so we can make sure the sub-buffer test later works properly.
+    cl_uint addressAlignBits;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(addressAlignBits), &addressAlignBits, NULL );
+
+    size_t addressAlign = addressAlignBits/8;
+    if ( addressAlign < 128 )
+    {
+        addressAlign = 128;
+    }
+
+    for ( unsigned int i = 0; i < sizeof(bufferFlags) / sizeof(cl_mem_flags); ++i )
+    {
+        //printf("@@@ bufferFlags[%u]=0x%x\n", i, bufferFlags[ i ]);
+        if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
+        {
+            // Create a buffer object to test against.
+            buffer = malloc( addressAlign * 4 );
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
+            if ( error )
+            {
+                free( buffer );
+                test_error( error, "Unable to create buffer (CL_MEM_USE_HOST_PTR) to test with" );
+            }
+
+            // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
+            test_error( error, "Unable to set mem object destructor callback" );
+
+            void * ptr;
+            TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_HOST_PTR, ptr, buffer, "host pointer", "%p", void * )
+        }
+        else if ( (bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR) && (bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR) )
+        {
+            // Create a buffer object to test against.
+            buffer = malloc( addressAlign * 4 );
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
+            if ( error )
+            {
+                free( buffer );
+                test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
+            test_error( error, "Unable to set mem object destructor callback" );
+        }
+        else if ( bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR )
+        {
+            // Create a buffer object to test against.
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
+            test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR) to test with" );
+        }
+        else if ( bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR )
+        {
+            // Create a buffer object to test against.
+            buffer = malloc( addressAlign * 4 );
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
+            if ( error )
+            {
+                free( buffer );
+                test_error( error, "Unable to create buffer (CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
+            test_error( error, "Unable to set mem object destructor callback" );
+        }
+        else
+        {
+            // Create a buffer object to test against.
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
+            test_error( error, "Unable to create buffer to test with" );
+        }
+
+        // Perform buffer object queries.
+        cl_mem_object_type type;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
+
+        cl_mem_flags flags;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_FLAGS, flags, (unsigned int)bufferFlags[ i ], "flags", "%d", unsigned int )
+
+        size_t sz;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign * 4 ), "size", "%ld", size_t )
+
+        cl_uint mapCount;
+        error = clGetMemObjectInfo( bufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
+        test_error( error, "Unable to get mem object map count" );
+        if( size != sizeof( mapCount ) )
+        {
+            log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
+                      (int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
+            return -1;
+        }
+
+        cl_uint refCount;
+        error = clGetMemObjectInfo( bufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+        test_error( error, "Unable to get mem object reference count" );
+        if( size != sizeof( refCount ) )
+        {
+            log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
+                      (int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
+            return -1;
+        }
+
+        cl_context otherCtx;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+        cl_mem origObj;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (void *)NULL, "associated mem object", "%p", void * )
+
+        size_t offset;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
+
+        cl_buffer_region region;
+        region.origin = addressAlign;
+        region.size = addressAlign;
+
+        // Loop over possible sub-buffer objects to create.
+        for ( unsigned int j = 0; j < sizeof(subBufferFlags) / sizeof(cl_mem_flags); ++j )
+        {
+            if ( subBufferFlags[ j ] & CL_MEM_READ_WRITE )
+            {
+                if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) )
+                    continue; // Buffer must be read_write for sub-buffer to be read_write.
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_READ_ONLY )
+            {
+                if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_READ_ONLY) )
+                    continue; // Buffer must be read_write or read_only for sub-buffer to be read_only
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_WRITE_ONLY )
+            {
+                if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_WRITE_ONLY) )
+                    continue; // Buffer must be read_write or write_only for sub-buffer to be write_only
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_HOST_READ_ONLY )
+            {
+                if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_WRITE_ONLY) )
+                    continue; // Buffer must be host all access or host read_only for sub-buffer to be host read_only
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_HOST_WRITE_ONLY )
+            {
+                if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_READ_ONLY) )
+                    continue; // Buffer must be host all access or host write_only for sub-buffer to be host write_only
+            }
+            //printf("@@@ bufferFlags[%u]=0x%x subBufferFlags[%u]=0x%x\n", i, bufferFlags[ i ], j, subBufferFlags[ j ]);
+
+            subBufferObject = clCreateSubBuffer( bufferObject, subBufferFlags[ j ], CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
+            test_error( error, "Unable to create sub-buffer to test against" );
+
+            // Perform sub-buffer object queries.
+            cl_mem_object_type type;
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
+
+            cl_mem_flags flags;
+            cl_mem_flags inheritedFlags = subBufferFlags[ j ];
+            if ( (subBufferFlags[ j ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) == 0 )
+            {
+              inheritedFlags |= bufferFlags[ i ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY);
+            }
+            inheritedFlags |= bufferFlags[ i ] & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR);
+            if ( (subBufferFlags[ j ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) == 0)
+            {
+              inheritedFlags |= bufferFlags[ i ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS);
+            }
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_FLAGS, flags, (unsigned int)inheritedFlags, "flags", "%d", unsigned int )
+
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign ), "size", "%ld", size_t )
+
+            if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
+            {
+                void * ptr;
+                void * offsetInBuffer = (char *)buffer + addressAlign;
+
+                TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_HOST_PTR, ptr, offsetInBuffer, "host pointer", "%p", void * )
+            }
+
+            cl_uint mapCount;
+            error = clGetMemObjectInfo( subBufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
+            test_error( error, "Unable to get mem object map count" );
+            if( size != sizeof( mapCount ) )
+            {
+                log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
+                          (int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
+                return -1;
+            }
+
+            cl_uint refCount;
+            error = clGetMemObjectInfo( subBufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+            test_error( error, "Unable to get mem object reference count" );
+            if( size != sizeof( refCount ) )
+            {
+                log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
+                          (int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
+                return -1;
+            }
+
+            cl_context otherCtx;
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (cl_mem)bufferObject, "associated mem object", "%p", void * )
+
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_OFFSET, offset, (size_t)( addressAlign ), "offset", "%ld", size_t )
+
+            clReleaseMemObject( subBufferObject );
+            subBufferObject = NULL;
+
+        }
+
+        clReleaseMemObject( bufferObject );
+        bufferObject = NULL;
+    }
+
+    return CL_SUCCESS;
+}
+
+
+int test_get_imageObject_info( cl_mem * image, cl_mem_flags objectFlags, cl_image_desc *imageInfo, cl_image_format *imageFormat, size_t pixelSize, cl_context context )
+{
+    int error;
+    size_t size;
+    cl_mem_object_type type;
+    cl_mem_flags flags;
+    cl_uint mapCount;
+    cl_uint refCount;
+    size_t rowPitchMultiplier;
+    size_t slicePitchMultiplier;
+    cl_context otherCtx;
+    size_t offset;
+    size_t sz;
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_TYPE, type, imageInfo->image_type, "type", "%d", int )
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_FLAGS, flags, (unsigned int)objectFlags, "flags", "%d", unsigned int )
+
+    error = clGetMemObjectInfo( *image, CL_MEM_SIZE, sizeof( sz ), &sz, NULL );
+    test_error( error, "Unable to get mem size" );
+
+    // The size returned is not constrained by the spec.
+
+    error = clGetMemObjectInfo( *image, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
+    test_error( error, "Unable to get mem object map count" );
+    if( size != sizeof( mapCount ) )
+    {
+        log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
+                  (int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clGetMemObjectInfo( *image, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get mem object reference count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
+                  (int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
+        return -1;
+    }
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
+
+    return CL_SUCCESS;
+}
+
+
+int test_get_image_info( cl_device_id deviceID, cl_context context, cl_mem_object_type type )
+{
+    int error;
+    size_t size;
+    void * image = NULL;
+
+    cl_mem imageObject;
+    cl_image_desc imageInfo;
+
+    cl_mem_flags imageFlags[] = {
+        CL_MEM_READ_WRITE,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_READ_ONLY,
+        CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_WRITE_ONLY,
+        CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+    };
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    cl_image_format imageFormat;
+    size_t pixelSize = 4;
+
+    imageFormat.image_channel_order = CL_RGBA;
+    imageFormat.image_channel_data_type = CL_UNORM_INT8;
+
+    imageInfo.image_width = imageInfo.image_height = imageInfo.image_depth = 1;
+    imageInfo.image_array_size = 0;
+    imageInfo.num_mip_levels = imageInfo.num_samples = 0;
+    imageInfo.buffer = NULL;
+
+    d = init_genrand( gRandomSeed );
+
+    for ( unsigned int i = 0; i < sizeof(imageFlags) / sizeof(cl_mem_flags); ++i )
+    {
+        imageInfo.image_row_pitch = 0;
+        imageInfo.image_slice_pitch = 0;
+
+        switch (type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                imageInfo.image_width = get_image_dim(&d, 1023);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE2D:
+                imageInfo.image_width = get_image_dim(&d, 1023);
+                imageInfo.image_height = get_image_dim(&d, 1023);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE3D:
+                imageInfo.image_width = get_image_dim(&d, 127);
+                imageInfo.image_height = get_image_dim(&d, 127);
+                imageInfo.image_depth = get_image_dim(&d, 127);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE3D;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                imageInfo.image_width = get_image_dim(&d, 1023);
+                imageInfo.image_array_size = get_image_dim(&d, 1023);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                imageInfo.image_width = get_image_dim(&d, 255);
+                imageInfo.image_height = get_image_dim(&d, 255);
+                imageInfo.image_array_size = get_image_dim(&d, 255);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+                break;
+        }
+
+        if ( imageFlags[i] & CL_MEM_USE_HOST_PTR )
+        {
+            // Create an image object to test against.
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_USE_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+
+            void * ptr;
+            TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+            // release image object
+            clReleaseMemObject(imageObject);
+
+            // Try again with non-zero rowPitch.
+            imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
+            switch (type)
+            {
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE3D:
+                    imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
+                    break;
+            }
+
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image2d (CL_MEM_USE_HOST_PTR) to test with" );
+            }
+
+            // Make sure image2d is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+
+            TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
+            ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else if ( (imageFlags[i] & CL_MEM_ALLOC_HOST_PTR) && (imageFlags[i] & CL_MEM_COPY_HOST_PTR) )
+        {
+            // Create an image object to test against.
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[ i ], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+            // release image object
+            clReleaseMemObject(imageObject);
+
+            // Try again with non-zero rowPitch.
+            imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
+            switch (type)
+            {
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE3D:
+                    imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
+                    break;
+            }
+
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else if ( imageFlags[i] & CL_MEM_ALLOC_HOST_PTR )
+        {
+            // Create an image object to test against.
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
+            test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR) to test with" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else if ( imageFlags[i] & CL_MEM_COPY_HOST_PTR )
+        {
+            // Create an image object to test against.
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+            clReleaseMemObject(imageObject);
+
+            // Try again with non-zero rowPitch.
+            imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
+            switch (type)
+            {
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE3D:
+                    imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
+                    break;
+            }
+
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else
+        {
+            // Create an image object to test against.
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
+            test_error( error, "Unable to create image to test with" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+
+        clReleaseMemObject( imageObject );
+    }
+
+    return CL_SUCCESS;
+}
+
+
+int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D);
+}
+
+int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE3D);
+}
+
+int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D);
+}
+
+int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D_ARRAY);
+}
+
+int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D_ARRAY);
+}
+
+
--- a/test_conformance/compatibility/test_conformance/api/test_mem_objects.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_mem_objects.cpp
@@ -0,0 +1,108 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+static volatile cl_int sDestructorIndex;
+
+void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
+{
+    int * userPtr = (int *)userData;
+
+    // ordering of callbacks is guaranteed, meaning we don't need to do atomic operation here
+    *userPtr = ++sDestructorIndex;
+}
+
+#ifndef ABS
+#define ABS( x ) ( ( x < 0 ) ? -x : x )
+#endif
+
+int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
+{
+    cl_int error;
+    int i;
+
+    // Set up some variables to catch the order in which callbacks are called
+    volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
+    sDestructorIndex = 0;
+
+    // Set up the callbacks
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
+    test_error( error, "Unable to set destructor callback" );
+
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
+    test_error( error, "Unable to set destructor callback" );
+
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
+    test_error( error, "Unable to set destructor callback" );
+
+    // Now release the buffer, which SHOULD call the callbacks
+    error = clReleaseMemObject( memObject );
+    test_error( error, "Unable to release test buffer" );
+
+    // Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
+    memObject = NULL;
+
+    // At this point, all three callbacks should have already been called
+    int numErrors = 0;
+    for(  i = 0; i < 3; i++ )
+    {
+        // Spin waiting for the release to finish.  If you don't call the mem_destructor_callback, you will not
+        // pass the test.  bugzilla 6316
+        while( 0 == callbackOrders[i] )
+        {}
+
+        if( ABS( callbackOrders[ i ] ) != 3-i )
+        {
+            log_error( "\tERROR: Callback %d was called in the wrong order! (Was called order %d, should have been order %d)\n",
+                      i+1, ABS( callbackOrders[ i ] ), i );
+            numErrors++;
+        }
+    }
+
+    return ( numErrors > 0 ) ? -1 : 0;
+}
+
+int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clMemWrapper testBuffer, testImage;
+    cl_int error;
+
+
+    // Create a buffer and an image to test callbacks against
+    testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
+    test_error( error, "Unable to create testing buffer" );
+
+    if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
+    {
+        log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
+        return -1;
+    }
+
+    if( checkForImageSupport( deviceID ) == 0 )
+    {
+        cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
+        testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
+        test_error( error, "Unable to create testing image" );
+
+        if( test_mem_object_destructor_callback_single( testImage ) != 0 )
+        {
+            log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
--- a/test_conformance/compatibility/test_conformance/api/test_native_kernel.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_native_kernel.cpp
@@ -0,0 +1,121 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include "../../test_common/harness/conversions.h"
+
+extern cl_uint gRandomSeed;
+
+static void CL_CALLBACK test_native_kernel_fn( void *userData )
+{
+    struct arg_struct {
+        cl_int * source;
+        cl_int * dest;
+        cl_int count;
+    } *args = (arg_struct *)userData;
+
+    for( cl_int i = 0; i < args->count; i++ )
+        args->dest[ i ] = args->source[ i ];
+}
+
+int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    int error;
+    RandomSeed seed( gRandomSeed );
+    // Check if we support native kernels
+    cl_device_exec_capabilities capabilities;
+    error = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(capabilities), &capabilities, NULL);
+    if (!(capabilities & CL_EXEC_NATIVE_KERNEL)) {
+        log_info("Device does not support CL_EXEC_NATIVE_KERNEL.\n");
+        return 0;
+    }
+
+    clMemWrapper streams[ 2 ];
+#if !(defined (_WIN32) && defined (_MSC_VER))
+    cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
+#else
+    cl_int* inBuffer  = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
+    cl_int* outBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
+#endif
+    clEventWrapper finishEvent;
+
+    struct arg_struct
+    {
+        cl_mem inputStream;
+        cl_mem outputStream;
+        cl_int count;
+    } args;
+
+
+    // Create some input values
+    generate_random_data( kInt, n_elems, seed, inBuffer );
+
+
+    // Create I/O streams
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
+    test_error( error, "Unable to create I/O stream" );
+    streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
+    test_error( error, "Unable to create I/O stream" );
+
+
+    // Set up the arrays to call with
+    args.inputStream = streams[ 0 ];
+    args.outputStream = streams[ 1 ];
+    args.count = n_elems;
+
+    void * memLocs[ 2 ] = { &args.inputStream, &args.outputStream };
+
+
+    // Run the kernel
+    error = clEnqueueNativeKernel( queue, test_native_kernel_fn,
+                                      &args, sizeof( args ),
+                                      2, &streams[ 0 ],
+                                      (const void **)memLocs,
+                                      0, NULL, &finishEvent );
+    test_error( error, "Unable to queue native kernel" );
+
+    // Finish and wait for the kernel to complete
+    error = clFinish( queue );
+    test_error(error, "clFinish failed");
+
+    error = clWaitForEvents( 1, &finishEvent );
+    test_error(error, "clWaitForEvents failed");
+
+    // Now read the results and verify
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    for( int i = 0; i < n_elems; i++ )
+    {
+        if( inBuffer[ i ] != outBuffer[ i ] )
+        {
+            log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
+                      i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+
+
+
+
--- a/test_conformance/compatibility/test_conformance/api/test_null_buffer_arg.c
+++ b/test_conformance/compatibility/test_conformance/api/test_null_buffer_arg.c
@@ -0,0 +1,162 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#if defined(__APPLE__)
+#include <OpenCL/opencl.h>
+#include <OpenCL/cl_platform.h>
+#else
+#include <CL/opencl.h>
+#include <CL/cl_platform.h>
+#endif
+#include "procs.h"
+
+
+enum { SUCCESS, FAILURE };
+typedef enum { NON_NULL_PATH, ADDROF_NULL_PATH, NULL_PATH } test_type;
+
+#define NITEMS 4096
+
+/* places the casted long value of the src ptr into each element of the output
+ * array, to allow testing that the kernel actually _gets_ the NULL value */
+const char *kernel_string =
+"kernel void test_kernel(global float *src, global long *dst)\n"
+"{\n"
+"    uint tid = get_global_id(0);\n"
+"    dst[tid] = (long)src;\n"
+"}\n";
+
+/*
+ * The guts of the test:
+ * call setKernelArgs with a regular buffer, &NULL, or NULL depending on
+ * the value of 'test_type'
+ */
+static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
+    cl_mem test_buf, cl_mem result_buf, test_type type)
+{
+    unsigned int test_success = 0;
+
+    unsigned int i;
+    cl_int status;
+    char *typestr;
+
+    if (type == NON_NULL_PATH) {
+        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
+        typestr = "non-NULL";
+    } else if (type == ADDROF_NULL_PATH) {
+        test_buf = NULL;
+        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
+        typestr = "&NULL";
+    } else if (type == NULL_PATH) {
+        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
+        typestr = "NULL";
+    }
+
+    log_info("Testing setKernelArgs with %s buffer.\n", typestr);
+
+    if (status != CL_SUCCESS) {
+        log_error("clSetKernelArg failed with status: %d\n", status);
+        return FAILURE; // no point in continuing *this* test
+    }
+
+    size_t global = NITEMS;
+    status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global,
+        NULL, 0, NULL, NULL);
+    test_error(status, "NDRangeKernel failed.");
+
+    cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long));
+    status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
+        sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL);
+    test_error(status, "ReadBuffer failed.");
+
+    // in the non-null case, we expect NONZERO values:
+    if (type == NON_NULL_PATH) {
+        for (i=0; i<NITEMS; i++) {
+            if (host_result[i] == 0) {
+                log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
+                test_success = FAILURE; break;
+            }
+        }
+
+    } else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
+        for (i=0; i<NITEMS; i++) {
+            if (host_result[i] != 0) {
+                log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
+                test_success = FAILURE; break;
+            }
+        }
+    }
+
+    free(host_result);
+
+    if (test_success == SUCCESS) {
+        log_info("\t%s ok.\n", typestr);
+    }
+
+    return test_success;
+}
+
+int test_null_buffer_arg(cl_device_id device, cl_context context,
+    cl_command_queue queue, int num_elements)
+{
+    unsigned int test_success = 0;
+    unsigned int i;
+    cl_int status;
+    cl_program program;
+    cl_kernel kernel;
+
+    // prep kernel:
+    program = clCreateProgramWithSource(context, 1, &kernel_string, NULL, &status);
+    test_error(status, "CreateProgramWithSource failed.");
+
+    status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    test_error(status, "BuildProgram failed.");
+
+    kernel = clCreateKernel(program, "test_kernel", &status);
+    test_error(status, "CreateKernel failed.");
+
+    cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
+        NULL, NULL);
+
+    cl_mem dev_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, NITEMS*sizeof(cl_long),
+        NULL, NULL);
+
+    // set the destination buffer normally:
+    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_dst);
+    test_error(status, "SetKernelArg failed.");
+
+    //
+    // we test three cases:
+    //
+    // - typical case, used everyday: non-null buffer
+    // - the case of src as &NULL (the spec-compliance test)
+    // - the case of src as NULL (the backwards-compatibility test, Apple only)
+    //
+
+    test_success  = test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NON_NULL_PATH);
+    test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, ADDROF_NULL_PATH);
+
+#ifdef __APPLE__
+    test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NULL_PATH);
+#endif
+
+    // clean up:
+    if (dev_src) clReleaseMemObject(dev_src);
+    clReleaseMemObject(dev_dst);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+
+    return test_success;
+}
--- a/test_conformance/compatibility/test_conformance/api/test_platform.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_platform.cpp
@@ -0,0 +1,289 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#include <string.h>
+
+#define EXTENSION_NAME_BUF_SIZE 4096
+
+#define PRINT_EXTENSION_INFO 0
+
+int test_platform_extensions(cl_device_id deviceID, cl_context context,
+                 cl_command_queue queue, int num_elements)
+{
+    const char * extensions[] = {
+    "cl_khr_byte_addressable_store",
+//    "cl_APPLE_SetMemObjectDestructor",
+    "cl_khr_global_int32_base_atomics",
+    "cl_khr_global_int32_extended_atomics",
+    "cl_khr_local_int32_base_atomics",
+    "cl_khr_local_int32_extended_atomics",
+    "cl_khr_int64_base_atomics",
+    "cl_khr_int64_extended_atomics",
+// need to put in entires for various atomics
+    "cl_khr_3d_image_writes",
+    "cl_khr_fp16",
+    "cl_khr_fp64",
+    NULL
+    };
+
+    bool extensionsSupported[] = {
+    false, //"cl_khr_byte_addressable_store",
+    false, // need to put in entires for various atomics
+    false, // "cl_khr_global_int32_base_atomics",
+    false, // "cl_khr_global_int32_extended_atomics",
+    false, // "cl_khr_local_int32_base_atomics",
+    false, // "cl_khr_local_int32_extended_atomics",
+    false, // "cl_khr_int64_base_atomics",
+    false, // "cl_khr_int64_extended_atomics",
+    false, //"cl_khr_3d_image_writes",
+    false, //"cl_khr_fp16",
+    false, //"cl_khr_fp64",
+    false //NULL
+    };
+
+    int extensionIndex;
+
+    cl_platform_id platformID;
+    cl_int err;
+
+    char platform_extensions[EXTENSION_NAME_BUF_SIZE];
+    char device_extensions[EXTENSION_NAME_BUF_SIZE];
+
+    // Okay, so what we're going to do is just check the device indicated by
+    // deviceID against the platform that includes this device
+
+
+    // pass CL_DEVICE_PLATFORM to clGetDeviceInfo
+    // to get a result of type cl_platform_id
+
+    err = clGetDeviceInfo(deviceID,
+              CL_DEVICE_PLATFORM,
+              sizeof(cl_platform_id),
+              (void *)(&platformID),
+              NULL);
+
+    if(err != CL_SUCCESS)
+    {
+    vlog_error("test_platform_extensions : could not get platformID from device\n");
+    return -1;
+    }
+
+
+    // now we grab the set of extensions specified by the platform
+    err = clGetPlatformInfo(platformID,
+                CL_PLATFORM_EXTENSIONS,
+                sizeof(platform_extensions),
+                (void *)(&platform_extensions[0]),
+                NULL);
+    if(err != CL_SUCCESS)
+    {
+    vlog_error("test_platform_extensions : could not get extension string from platform\n");
+    return -1;
+    }
+
+#if PRINT_EXTENSION_INFO
+    log_info("Platform extensions include \"%s\"\n\n", platform_extensions);
+#endif
+
+    // here we parse the platform extensions, to look for the "important" ones
+    for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
+    {
+    if(strstr(platform_extensions, extensions[extensionIndex]) != NULL)
+    {
+        // we found it
+#if PRINT_EXTENSION_INFO
+        log_info("Found \"%s\" in platform extensions\n",
+        extensions[extensionIndex]);
+#endif
+        extensionsSupported[extensionIndex] = true;
+    }
+    }
+
+    // and then we grab the set of extensions specified by the device
+    // (this can be turned into a "loop over all devices in this platform")
+    err = clGetDeviceInfo(deviceID,
+              CL_DEVICE_EXTENSIONS,
+              sizeof(device_extensions),
+              (void *)(&device_extensions[0]),
+              NULL);
+    if(err != CL_SUCCESS)
+    {
+    vlog_error("test_platform_extensions : could not get extension string from device\n");
+    return -1;
+    }
+
+
+#if PRINT_EXTENSION_INFO
+    log_info("Device extensions include \"%s\"\n\n", device_extensions);
+#endif
+
+    for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
+    {
+    if(extensionsSupported[extensionIndex] == false)
+    {
+        continue; // skip this one
+    }
+
+    if(strstr(device_extensions, extensions[extensionIndex]) == NULL)
+    {
+        // device does not support it
+        vlog_error("Platform supports extension \"%s\" but device does not\n",
+               extensions[extensionIndex]);
+        return -1;
+    }
+    }
+    return 0;
+}
+
+int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+  cl_platform_id platforms[16];
+  cl_uint num_platforms;
+  char *string_returned;
+
+  string_returned = (char*)malloc(8192);
+
+  int total_errors = 0;
+  int err = CL_SUCCESS;
+
+
+  err = clGetPlatformIDs(16, platforms, &num_platforms);
+  test_error(err, "clGetPlatformIDs failed");
+
+  if (num_platforms <= 16) {
+    // Try with NULL
+    err = clGetPlatformIDs(num_platforms, platforms, NULL);
+    test_error(err, "clGetPlatformIDs failed with NULL for return size");
+  }
+
+  if (num_platforms < 1) {
+    log_error("Found 0 platforms.\n");
+    return -1;
+  }
+  log_info("Found %d platforms.\n", num_platforms);
+
+
+  for (int p=0; p<(int)num_platforms; p++) {
+    cl_device_id *devices;
+    cl_uint num_devices;
+    size_t size;
+
+
+    log_info("Platform %d (%p):\n", p, platforms[p]);
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_PROFILE, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_PROFILE failed");
+    log_info("\tCL_PLATFORM_PROFILE: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VERSION, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_VERSION failed");
+    log_info("\tCL_PLATFORM_VERSION: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_NAME failed");
+    log_info("\tCL_PLATFORM_NAME: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VENDOR, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_VENDOR failed");
+    log_info("\tCL_PLATFORM_VENDOR: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_EXTENSIONS, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_EXTENSIONS failed");
+    log_info("\tCL_PLATFORM_EXTENSIONS: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
+    test_error(err, "clGetDeviceIDs size failed.\n");
+    devices = (cl_device_id *)malloc(num_devices*sizeof(cl_device_id));
+    memset(devices, 0, sizeof(cl_device_id)*num_devices);
+    err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
+    test_error(err, "clGetDeviceIDs failed.\n");
+
+    log_info("\tPlatform has %d devices.\n", (int)num_devices);
+    for (int d=0; d<(int)num_devices; d++) {
+      size_t returned_size;
+      cl_platform_id returned_platform;
+      cl_context context;
+      cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[p], 0 };
+
+      err = clGetDeviceInfo(devices[d], CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &returned_platform, &returned_size);
+      test_error(err, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM\n");
+      if (returned_size != sizeof(cl_platform_id)) {
+        log_error("Reported return size (%ld) does not match expected size (%ld).\n", returned_size, sizeof(cl_platform_id));
+        total_errors++;
+      }
+
+      memset(string_returned, 0, 8192);
+      err = clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 8192, string_returned, NULL);
+      test_error(err, "clGetDeviceInfo failed for CL_DEVICE_NAME\n");
+
+      log_info("\t\tPlatform for device %d (%s) is %p.\n", d, string_returned, returned_platform);
+
+      log_info("\t\t\tTesting clCreateContext for the platform/device...\n");
+      // Try creating a context for the platform
+      context = clCreateContext(properties, 1, &devices[d], NULL, NULL, &err);
+      test_error(err, "\t\tclCreateContext failed for device with platform properties\n");
+
+      memset(properties, 0, sizeof(cl_context_properties)*3);
+
+      err = clGetContextInfo(context, CL_CONTEXT_PROPERTIES, sizeof(cl_context_properties)*3, properties, &returned_size);
+      test_error(err, "clGetContextInfo for CL_CONTEXT_PROPERTIES failed");
+      if (returned_size != sizeof(cl_context_properties)*3) {
+        log_error("Invalid size returned from clGetContextInfo for CL_CONTEXT_PROPERTIES. Got %ld, expected %ld.\n",
+                  returned_size, sizeof(cl_context_properties)*3);
+        total_errors++;
+      }
+
+      if (properties[0] != (cl_context_properties)CL_CONTEXT_PLATFORM || properties[1] != (cl_context_properties)platforms[p]) {
+        log_error("Wrong properties returned. Expected: [%p %p], got [%p %p]\n",
+                  (void*)CL_CONTEXT_PLATFORM, platforms[p], (void*)properties[0], (void*)properties[1]);
+        total_errors++;
+      }
+
+      err = clReleaseContext(context);
+      test_error(err, "clReleaseContext failed");
+    }
+    free(devices);
+  }
+
+  free(string_returned);
+
+  return total_errors;
+}
--- a/test_conformance/compatibility/test_conformance/api/test_queries.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_queries.cpp
@@ -0,0 +1,635 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/imageHelpers.h"
+#include <stdlib.h>
+#include <ctype.h>
+
+int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_platform_id platform;
+    cl_int error;
+    char buffer[ 4098 ];
+    size_t length;
+
+    // Get the platform to use
+    error = clGetPlatformIDs(1, &platform, NULL);
+    test_error( error, "Unable to get platform" );
+
+    // Platform profile should either be FULL_PROFILE or EMBEDDED_PROFILE
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_PROFILE, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get platform profile string" );
+
+    log_info("Returned CL_PLATFORM_PROFILE %s.\n", buffer);
+
+    if( strcmp( buffer, "FULL_PROFILE" ) != 0 && strcmp( buffer, "EMBEDDED_PROFILE" ) != 0 )
+    {
+        log_error( "ERROR: Returned platform profile string is not a valid string by OpenCL 1.2! (Returned: %s)\n", buffer );
+        return -1;
+    }
+    if( strlen( buffer )+1 != length )
+    {
+        log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
+                  (int)strlen( buffer )+1, (int)length );
+        return -1;
+    }
+
+    // Check just length return
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_PROFILE, 0, NULL, &length );
+    test_error( error, "Unable to get platform profile length" );
+    if( strlen( (char *)buffer )+1 != length )
+    {
+        log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
+                  (int)strlen( (char *)buffer )+1, (int)length );
+        return -1;
+    }
+
+
+    // Platform version should fit the regex "OpenCL *[0-9]+\.[0-9]+"
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get platform version string" );
+
+    log_info("Returned CL_PLATFORM_VERSION %s.\n", buffer);
+
+    if( memcmp( buffer, "OpenCL ", strlen( "OpenCL " ) ) != 0 )
+    {
+        log_error( "ERROR: Initial part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    char *p1 = (char *)buffer + strlen( "OpenCL " );
+    while( *p1 == ' ' )
+        p1++;
+    char *p2 = p1;
+    while( isdigit( *p2 ) )
+        p2++;
+    if( *p2 != '.' )
+    {
+        log_error( "ERROR: Numeric part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    char *p3 = p2 + 1;
+    while( isdigit( *p3 ) )
+        p3++;
+    if( *p3 != ' ' )
+    {
+        log_error( "ERROR: space expected after minor version number! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    *p2 = ' '; // Put in a space for atoi below.
+    p2++;
+
+    // make sure it is null terminated
+    for( ; p3 != buffer + length; p3++ )
+        if( *p3 == '\0' )
+            break;
+    if( p3 == buffer + length )
+    {
+        log_error( "ERROR: platform version string is not NUL terminated!\n" );
+        return -1;
+    }
+
+    int major = atoi( p1 );
+    int minor = atoi( p2 );
+    int minor_revision = 2;
+    if( major * 10 + minor < 10 + minor_revision )
+    {
+        log_error( "ERROR: OpenCL profile version returned is less than 1.%d!\n", minor_revision );
+        return -1;
+    }
+
+    // Sanity checks on the returned values
+    if( length != strlen( (char *)buffer ) + 1)
+    {
+        log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer )+1, (int)length );
+        return -1;
+    }
+
+    // Check just length
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_VERSION, 0, NULL, &length );
+    test_error( error, "Unable to get platform version length" );
+    if( length != strlen( (char *)buffer )+1 )
+    {
+        log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( buffer )+1, (int)length );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t size;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    clSamplerWrapper sampler = clCreateSampler( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_LINEAR, &error );
+    test_error( error, "Unable to create sampler to test with" );
+
+    cl_uint refCount;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get sampler ref count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
+    }
+
+    cl_context otherCtx;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size );
+    test_error( error, "Unable to get sampler context" );
+    if( otherCtx != context )
+    {
+        log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx );
+        return -1;
+    }
+    if( size != sizeof( otherCtx ) )
+    {
+        log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size );
+        return -1;
+    }
+
+    cl_addressing_mode mode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size );
+    test_error( error, "Unable to get sampler addressing mode" );
+    if( mode != CL_ADDRESS_CLAMP )
+    {
+        log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode );
+        return -1;
+    }
+    if( size != sizeof( mode ) )
+    {
+        log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size );
+        return -1;
+    }
+
+    cl_filter_mode fmode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size );
+    test_error( error, "Unable to get sampler filter mode" );
+    if( fmode != CL_FILTER_LINEAR )
+    {
+        log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode );
+        return -1;
+    }
+    if( size != sizeof( fmode ) )
+    {
+        log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size );
+        return -1;
+    }
+
+    cl_int norm;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size );
+    test_error( error, "Unable to get sampler normalized flag" );
+    if( norm != CL_TRUE )
+    {
+        log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm );
+        return -1;
+    }
+    if( size != sizeof( norm ) )
+    {
+        log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size );
+        return -1;
+    }
+
+    return 0;
+}
+
+#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast )    \
+error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get command queue " name );                            \
+if( val != expected )                                                                \
+{                                                                                    \
+log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val );    \
+return -1;                                                                        \
+}            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}
+
+int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
+{
+    int error;
+    size_t size;
+
+    cl_command_queue_properties device_props;
+    clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
+    log_info("CL_DEVICE_QUEUE_PROPERTIES is %d\n", (int)device_props);
+
+    clCommandQueueWrapper queue = clCreateCommandQueue( context, deviceID, device_props, &error );
+    test_error( error, "Unable to create command queue to test with" );
+
+    cl_uint refCount;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get command queue reference count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
+    }
+
+    cl_context otherCtx;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+    cl_device_id otherDevice;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size);
+    test_error(error, "clGetCommandQueue failed.");
+
+    if (size != sizeof(cl_device_id)) {
+        log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size );
+        return -1;
+    }
+
+    /* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
+    cl_uint otherDevice_vid, deviceID_vid;
+    error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+
+    if( otherDevice_vid != deviceID_vid )
+    {
+        log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid );
+        return -1;
+    }
+
+    cl_command_queue_properties props;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int )
+
+    return 0;
+}
+
+int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
+{
+    int error;
+    size_t size;
+    cl_context_properties props;
+
+    error = clGetContextInfo( context, CL_CONTEXT_PROPERTIES, sizeof( props ), &props, &size );
+    test_error( error, "Unable to get context props" );
+
+    if (size == 0) {
+        // Valid size
+        return 0;
+    } else if (size == sizeof(cl_context_properties)) {
+        // Data must be NULL
+        if (props != 0) {
+            log_error("ERROR: Returned properties is no NULL.\n");
+            return -1;
+        }
+        // Valid data and size
+        return 0;
+    }
+    // Size was not 0 or 1
+    log_error( "ERROR: Returned size of context props is not valid! (expected 0 or %d, got %d)\n",
+              (int)sizeof(cl_context_properties), (int)size );
+    return -1;
+}
+
+#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast )    \
+error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get mem object " name );                            \
+if( val != expected )                                                                \
+{                                                                                    \
+log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type ")\n", (cast)(expected), (cast)val );    \
+return -1;                                                                        \
+}            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}
+
+void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
+{
+    free( data );
+}
+
+// All possible combinations of valid cl_mem_flags.
+static cl_mem_flags all_flags[16] = {
+  0,
+  CL_MEM_READ_WRITE,
+  CL_MEM_READ_ONLY,
+  CL_MEM_WRITE_ONLY,
+  CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+};
+
+#define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast )    \
+error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get device " name );                            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}                \
+log_info( "\tReported device " name " : " type "\n", (cast)val );
+
+#define TEST_DEVICE_PARAM_MEM( device, paramName, val, name, type, div )    \
+error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get device " name );                            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}                \
+log_info( "\tReported device " name " : " type "\n", (int)( val / div ) );
+
+int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
+{
+    int error;
+    size_t size;
+
+    cl_uint vendorID;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_VENDOR_ID, vendorID, "vendor ID", "0x%08x", int )
+
+    char extensions[ 10240 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_EXTENSIONS, sizeof( extensions ), &extensions, &size );
+    test_error( error, "Unable to get device extensions" );
+    if( size != strlen( extensions ) + 1 )
+    {
+        log_error( "ERROR: Returned size of device extensions does not validate! (expected %d, got %d)\n", (int)( strlen( extensions ) + 1 ), (int)size );
+        return -1;
+    }
+    log_info( "\tReported device extensions: %s \n", extensions );
+
+    cl_uint preferred;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, preferred, "preferred vector char width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, preferred, "preferred vector short width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, preferred, "preferred vector int width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, preferred, "preferred vector long width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, preferred, "preferred vector float width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, preferred, "preferred vector double width", "%d", int )
+
+    // Note that even if cl_khr_fp64, the preferred width for double can be non-zero.  For example, vendors
+    // extensions can support double but may not support cl_khr_fp64, which implies math library support.
+
+    cl_uint baseAddrAlign;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, baseAddrAlign, "base address alignment", "%d bytes", int )
+
+    cl_uint maxDataAlign;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, maxDataAlign, "min data type alignment", "%d bytes", int )
+
+    cl_device_mem_cache_type cacheType;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof( cacheType ), &cacheType, &size );
+    test_error( error, "Unable to get device global mem cache type" );
+    if( size != sizeof( cacheType ) )
+    {
+        log_error( "ERROR: Returned size of device global mem cache type does not validate! (expected %d, got %d)\n", (int)sizeof( cacheType ), (int)size );
+        return -1;
+    }
+    const char *cacheTypeName = ( cacheType == CL_NONE ) ? "CL_NONE" : ( cacheType == CL_READ_ONLY_CACHE ) ? "CL_READ_ONLY_CACHE" : ( cacheType == CL_READ_WRITE_CACHE ) ? "CL_READ_WRITE_CACHE" : "<unknown>";
+    log_info( "\tReported device global mem cache type: %s \n", cacheTypeName );
+
+    cl_uint cachelineSize;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cachelineSize, "global mem cacheline size", "%d bytes", int )
+
+    cl_ulong cacheSize;
+    TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cacheSize, "global mem cache size", "%d KB", 1024 )
+
+    cl_ulong memSize;
+    TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, memSize, "global mem size", "%d MB", ( 1024 * 1024 ) )
+
+    cl_device_local_mem_type localMemType;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_TYPE, sizeof( localMemType ), &localMemType, &size );
+    test_error( error, "Unable to get device local mem type" );
+    if( size != sizeof( cacheType ) )
+    {
+        log_error( "ERROR: Returned size of device local mem type does not validate! (expected %d, got %d)\n", (int)sizeof( localMemType ), (int)size );
+        return -1;
+    }
+    const char *localMemTypeName = ( localMemType == CL_LOCAL ) ? "CL_LOCAL" : ( cacheType == CL_GLOBAL ) ? "CL_GLOBAL" : "<unknown>";
+    log_info( "\tReported device local mem type: %s \n", localMemTypeName );
+
+
+    cl_bool errSupport;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ERROR_CORRECTION_SUPPORT, errSupport, "error correction support", "%d", int )
+
+    size_t timerResolution;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PROFILING_TIMER_RESOLUTION, timerResolution, "profiling timer resolution", "%ld nanoseconds", long )
+
+    cl_bool endian;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ENDIAN_LITTLE, endian, "little endian flag", "%d", int )
+
+    cl_bool avail;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_AVAILABLE, avail, "available flag", "%d", int )
+
+    cl_bool compilerAvail;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_COMPILER_AVAILABLE, compilerAvail, "compiler available flag", "%d", int )
+
+    char profile[ 1024 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profile ), &profile, &size );
+    test_error( error, "Unable to get device profile" );
+    if( size != strlen( profile ) + 1 )
+    {
+        log_error( "ERROR: Returned size of device profile does not validate! (expected %d, got %d)\n", (int)( strlen( profile ) + 1 ), (int)size );
+        return -1;
+    }
+    if( strcmp( profile, "FULL_PROFILE" ) != 0 && strcmp( profile, "EMBEDDED_PROFILE" ) != 0 )
+    {
+        log_error( "ERROR: Returned profile of device not FULL or EMBEDDED as required by OpenCL 1.2! (Returned %s)\n", profile );
+        return -1;
+    }
+    log_info( "\tReported device profile: %s \n", profile );
+
+
+    return 0;
+}
+
+
+
+
+static const char *sample_compile_size[2] = {
+    "__kernel void sample_test(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "     dst[tid] = src[tid];\n"
+    "\n"
+    "}\n",
+    "__kernel __attribute__((reqd_work_group_size(%d,%d,%d))) void sample_test(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "     dst[tid] = src[tid];\n"
+    "\n"
+    "}\n" };
+
+int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t realSize;
+    size_t kernel_max_workgroup_size;
+    size_t global[] = {64,14,10};
+    size_t local[] = {0,0,0};
+
+    cl_uint max_dimensions;
+
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dimensions), &max_dimensions, NULL);
+    test_error(error,  "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
+    log_info("Device reported CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = %d.\n", (int)max_dimensions);
+
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        error = create_single_kernel_helper( context, &program, &kernel, 1, &sample_compile_size[ 0 ], "sample_test" );
+        if( error != 0 )
+            return error;
+
+        error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(kernel_max_workgroup_size), &kernel_max_workgroup_size, NULL);
+        test_error( error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
+        log_info("The CL_KERNEL_WORK_GROUP_SIZE for the kernel is %d.\n", (int)kernel_max_workgroup_size);
+
+        size_t size[ 3 ];
+        error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
+        test_error( error, "Unable to get work group info" );
+
+        if( size[ 0 ] != 0 || size[ 1 ] != 0 || size[ 2 ] != 0 )
+        {
+            log_error( "ERROR: Nonzero compile work group size returned for nonspecified size! (returned %d,%d,%d)\n", (int)size[0], (int)size[1], (int)size[2] );
+            return -1;
+        }
+
+        if( realSize != sizeof( size ) )
+        {
+            log_error( "ERROR: Returned size of compile work group size not valid! (Expected %d, got %d)\n", (int)sizeof( size ), (int)realSize );
+            return -1;
+        }
+
+        // Determine some local dimensions to use for the test.
+        if (max_dimensions == 1) {
+            error = get_max_common_work_group_size(context, kernel, global[0], &local[0]);
+            test_error( error, "get_max_common_work_group_size failed");
+            log_info("For global dimension %d, kernel will require local dimension %d.\n", (int)global[0], (int)local[0]);
+        } else if (max_dimensions == 2) {
+            error = get_max_common_2D_work_group_size(context, kernel, global, local);
+            test_error( error, "get_max_common_2D_work_group_size failed");
+            log_info("For global dimension %d x %d, kernel will require local dimension %d x %d.\n", (int)global[0], (int)global[1], (int)local[0], (int)local[1]);
+        } else {
+            error = get_max_common_3D_work_group_size(context, kernel, global, local);
+            test_error( error, "get_max_common_3D_work_group_size failed");
+            log_info("For global dimension %d x %d x %d, kernel will require local dimension %d x %d x %d.\n",
+                     (int)global[0], (int)global[1], (int)global[2], (int)local[0], (int)local[1], (int)local[2]);
+        }
+    }
+
+
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        clMemWrapper in, out;
+        //char source[1024];
+        char *source = (char*)malloc(1024);
+        source[0] = '\0';
+
+        sprintf(source, sample_compile_size[1], local[0], local[1], local[2]);
+
+        error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&source, "sample_test" );
+        if( error != 0 )
+            return error;
+
+        size_t size[ 3 ];
+        error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
+        test_error( error, "Unable to get work group info" );
+
+        if( size[ 0 ] != local[0] || size[ 1 ] != local[1] || size[ 2 ] != local[2] )
+        {
+            log_error( "ERROR: Incorrect compile work group size returned for specified size! (returned %d,%d,%d, expected %d,%d,%d)\n",
+                      (int)size[0], (int)size[1], (int)size[2], (int)local[0], (int)local[1], (int)local[2]);
+            return -1;
+        }
+
+        // Verify that the kernel will only execute with that size.
+        in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*global[0], NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+        out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*global[0], NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        error = clSetKernelArg(kernel, 0, sizeof(in), &in);
+        test_error(error, "clSetKernelArg failed");
+        error = clSetKernelArg(kernel, 1, sizeof(out), &out);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        test_error(error, "clEnqueueNDRangeKernel failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        log_info("kernel_required_group_size may report spurious ERRORS in the conformance log.\n");
+
+        local[0]++;
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        if (error != CL_INVALID_WORK_GROUP_SIZE) {
+            log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
+                      (int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2] );
+            print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
+            return -1;
+        }
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        if (max_dimensions == 1) {
+            free(source);
+            return 0;
+        }
+
+        local[0]--; local[1]++;
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        if (error != CL_INVALID_WORK_GROUP_SIZE) {
+            log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
+                      (int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
+            print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
+            return -1;
+        }
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        if (max_dimensions == 2) {
+            return 0;
+            free(source);
+        }
+
+        local[1]--; local[2]++;
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        if (error != CL_INVALID_WORK_GROUP_SIZE) {
+            log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
+                      (int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
+            print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
+            return -1;
+        }
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+        free(source);
+    }
+
+    return 0;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/api/test_retain.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_retain.cpp
@@ -0,0 +1,234 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif // !_WIN32
+
+// Note: According to spec, the various functions to get instance counts should return an error when passed in an object
+// that has already been released. However, the spec is out of date. If it gets re-updated to allow such action, re-enable
+// this define.
+//#define VERIFY_AFTER_RELEASE    1
+
+#define GET_QUEUE_INSTANCE_COUNT(p) numInstances = ( (err = clGetCommandQueueInfo(p, CL_QUEUE_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
+#define GET_MEM_INSTANCE_COUNT(p) numInstances = ( (err = clGetMemObjectInfo(p, CL_MEM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
+
+#define VERIFY_INSTANCE_COUNT(c,rightValue) if( c != rightValue ) { \
+log_error( "ERROR: Instance count for test object is not valid! (should be %d, really is %d)\n", rightValue, c ); \
+return -1;    }
+
+int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
+{
+    cl_command_queue queue;
+    cl_uint numInstances;
+    int err;
+
+
+    /* Create a test queue */
+    queue = clCreateCommandQueue( context, deviceID, 0, &err );
+    test_error( err, "Unable to create command queue to test with" );
+
+    /* Test the instance count */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* Now release the program */
+    clReleaseCommandQueue( queue );
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    if( err != CL_INVALID_COMMAND_QUEUE )
+    {
+        print_error( err, "Command queue was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
+{
+    cl_command_queue queue;
+    unsigned int numInstances, i;
+    int err;
+
+
+    /* Create a test program */
+    queue = clCreateCommandQueue( context, deviceID, 0, &err );
+    test_error( err, "Unable to create command queue to test with" );
+
+    /* Increment 9 times, which should bring the count to 10 */
+    for( i = 0; i < 9; i++ )
+    {
+        clRetainCommandQueue( queue );
+    }
+
+    /* Test the instance count */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 10 );
+
+    /* Now release 5 times, which should take us to 5 */
+    for( i = 0; i < 5; i++ )
+    {
+        clReleaseCommandQueue( queue );
+    }
+
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 5 );
+
+    /* Retain again three times, which should take us to 8 */
+    for( i = 0; i < 3; i++ )
+    {
+        clRetainCommandQueue( queue );
+    }
+
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 8 );
+
+    /* Release 7 times, which should take it to 1 */
+    for( i = 0; i < 7; i++ )
+    {
+        clReleaseCommandQueue( queue );
+    }
+
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* And one last one */
+    clReleaseCommandQueue( queue );
+
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    if( err != CL_INVALID_COMMAND_QUEUE )
+    {
+        print_error( err, "Command queue was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem object;
+    cl_uint numInstances;
+    int err;
+
+
+    /* Create a test object */
+    object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
+    test_error( err, "Unable to create buffer to test with" );
+
+    /* Test the instance count */
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* Now release the program */
+    clReleaseMemObject( object );
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_MEM_INSTANCE_COUNT( object );
+    if( err != CL_INVALID_MEM_OBJECT )
+    {
+        print_error( err, "Mem object was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem object;
+    unsigned int numInstances, i;
+    int err;
+
+
+    /* Create a test object */
+    object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
+    test_error( err, "Unable to create buffer to test with" );
+
+    /* Increment 9 times, which should bring the count to 10 */
+    for( i = 0; i < 9; i++ )
+    {
+        clRetainMemObject( object );
+    }
+
+    /* Test the instance count */
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 10 );
+
+    /* Now release 5 times, which should take us to 5 */
+    for( i = 0; i < 5; i++ )
+    {
+        clReleaseMemObject( object );
+    }
+
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 5 );
+
+    /* Retain again three times, which should take us to 8 */
+    for( i = 0; i < 3; i++ )
+    {
+        clRetainMemObject( object );
+    }
+
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 8 );
+
+    /* Release 7 times, which should take it to 1 */
+    for( i = 0; i < 7; i++ )
+    {
+        clReleaseMemObject( object );
+    }
+
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* And one last one */
+    clReleaseMemObject( object );
+
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_MEM_INSTANCE_COUNT( object );
+    if( err != CL_INVALID_MEM_OBJECT )
+    {
+        print_error( err, "Mem object was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
--- a/test_conformance/compatibility/test_conformance/api/test_retain_program.c
+++ b/test_conformance/compatibility/test_conformance/api/test_retain_program.c
@@ -0,0 +1,109 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include "../../test_common/harness/compat.h"
+
+int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_program program;
+    cl_kernel kernel;
+    int error;
+    const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
+
+    /* Create a test program */
+    program = clCreateProgramWithSource( context, 1, testProgram, NULL, &error);
+    test_error( error, "Unable to create program to test with" );
+
+    /* Compile the program */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build sample program to test with" );
+
+    /* And create a kernel from it */
+    kernel = clCreateKernel( program, "sample_test", &error );
+    test_error( error, "Unable to create kernel" );
+
+    /* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
+    clReleaseProgram( program );
+    clReleaseKernel( kernel );
+
+    /* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */
+    return 0;
+}
+
+const char *sample_delay_kernel[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    for( int i = 0; i < 1000000; i++ ); \n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n" };
+
+int test_release_during_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_kernel kernel;
+    cl_mem streams[2];
+    size_t threads[1] = { 10 }, localThreadSize;
+
+
+    /* We now need an event to test. So we'll execute a kernel to get one */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize );
+    test_error( error, "Unable to calc local thread size" );
+
+
+    /* Execute the kernel */
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &localThreadSize, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* The kernel should still be executing, but we should still be able to release it. It's not terribly
+       useful, but we should be able to do it, if the internal refcounting is indeed correct. */
+
+    clReleaseMemObject( streams[ 1 ] );
+    clReleaseMemObject( streams[ 0 ] );
+    clReleaseKernel( kernel );
+    clReleaseProgram( program );
+
+  /* Now make sure we're really finished before we go on. */
+  error = clFinish(queue);
+  test_error( error, "Unable to finish context.");
+
+    return 0;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt
+++ b/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt
@@ -0,0 +1,65 @@
+set(MODULE_NAME COMPATIBILITY_BASIC)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
+    test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
+    test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
+    test_hiloeo.c test_local.c test_pointercast.c
+    test_if.c test_loop.c
+    test_readimage.c test_readimage_int16.c test_readimage_fp32.c
+    test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
+    test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
+    test_multireadimageonefmt.c test_multireadimagemultifmt.c
+    test_imagedim.c
+    test_vloadstore.c
+    test_int2float.c test_float2int.c
+    test_createkernelsinprogram.c
+    test_hostptr.c
+    test_explicit_s2v.cpp
+    test_constant.c
+    test_image_multipass.c
+    test_imagereadwrite.c test_imagereadwrite3d.c
+    test_image_param.c
+    test_imagenpot.c
+    test_image_r8.c
+    test_barrier.c
+    test_basic_parameter_types.c
+    test_arrayreadwrite.c
+    test_arraycopy.c
+    test_imagearraycopy.c
+    test_imagearraycopy3d.c
+    test_imagecopy.c
+    test_imagerandomcopy.c
+    test_arrayimagecopy.c
+    test_arrayimagecopy3d.c
+    test_imagecopy3d.c
+    test_enqueue_map.cpp
+    test_work_item_functions.cpp
+    test_astype.cpp
+    test_async_copy.cpp
+    test_sizeof.c
+    test_vector_creation.cpp
+    test_vec_type_hint.c
+    test_numeric_constants.cpp
+    test_constant_source.cpp
+    test_bufferreadwriterect.c
+    test_async_strided_copy.cpp
+    test_preprocessors.cpp
+    test_kernel_memory_alignment.cpp
+    test_global_work_offsets.cpp
+    test_kernel_call_kernel_function.cpp
+    test_local_kernel_scope.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/imageHelpers.cpp
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/rounding_mode.c
+    ../../test_common/harness/msvc9.c
+)
+
+include(../../../CMakeCommon.txt)
--- a/test_conformance/compatibility/test_conformance/basic/Jamfile
+++ b/test_conformance/compatibility/test_conformance/basic/Jamfile
@@ -0,0 +1,75 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_basic
+    : main.c
+      test_arraycopy.c
+      test_arrayimagecopy3d.c
+      test_arrayimagecopy.c
+      test_arrayreadwrite.c
+      test_astype.cpp
+      test_async_copy.cpp
+      test_barrier.c
+      test_basic_parameter_types.c
+      test_constant.c
+      test_createkernelsinprogram.c
+      test_enqueue_map.cpp
+      test_explicit_s2v.cpp
+      test_float2int.c
+      test_fpmath_float2.c
+      test_fpmath_float4.c
+      test_fpmath_float.c
+      test_hiloeo.c
+      test_hostptr.c
+      test_if.c
+      test_imagearraycopy3d.c
+      test_imagearraycopy.c
+      test_imagecopy3d.c
+      test_imagecopy.c
+      test_imagedim.c
+      test_image_multipass.c
+      test_imagenpot.c
+      test_image_param.c
+      test_image_r8.c
+      test_imagerandomcopy.c
+      test_imagereadwrite3d.c
+      test_imagereadwrite.c
+      test_int2float.c
+      test_intmath_int2.c
+      test_intmath_int4.c
+      test_intmath_int.c
+      test_intmath_long2.c
+      test_intmath_long4.c
+      test_intmath_long.c
+      test_local.c
+      test_loop.c
+      test_multireadimagemultifmt.c
+      test_multireadimageonefmt.c
+      test_pointercast.c
+      test_readimage3d.c
+      test_readimage3d_fp32.c
+      test_readimage3d_int16.c
+      test_readimage.c
+      test_readimage_fp32.c
+      test_readimage_int16.c
+      test_sizeof.c
+      test_vec_type_hint.c
+      test_vector_creation.cpp
+      test_vloadstore.c
+      test_work_item_functions.cpp
+      test_writeimage.c
+      test_writeimage_fp32.c
+      test_writeimage_int16.c
+      test_numeric_constants.cpp
+      test_kernel_call_kernel_function.cpp
+    ;
+    
+install dist
+    : test_basic
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/basic
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/basic
+    ;
+ 
--- a/test_conformance/compatibility/test_conformance/basic/Makefile
+++ b/test_conformance/compatibility/test_conformance/basic/Makefile
@@ -0,0 +1,94 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c \
+		test_intmath_int.c test_intmath_int2.c test_intmath_int4.c  \
+		test_intmath_long.c test_intmath_long2.c test_intmath_long4.c \
+		test_hiloeo.c test_local.c test_local_kernel_scope.cpp test_pointercast.c \
+		test_if.c test_sizeof.c test_loop.c \
+		test_readimage.c test_readimage_int16.c test_readimage_fp32.c \
+		test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c \
+		test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c \
+		test_multireadimageonefmt.c test_multireadimagemultifmt.c \
+		test_imagedim.c \
+		test_vloadstore.c \
+		test_int2float.c test_float2int.c \
+		test_createkernelsinprogram.c \
+		test_hostptr.c \
+		test_explicit_s2v.cpp \
+		test_constant.c \
+		test_constant_source.cpp \
+		test_image_multipass.c \
+		test_imagereadwrite.c test_imagereadwrite3d.c \
+		test_bufferreadwriterect.c \
+		test_image_param.c \
+		test_imagenpot.c \
+		test_image_r8.c \
+		test_barrier.c \
+		test_arrayreadwrite.c \
+		test_arraycopy.c \
+		test_imagearraycopy.c \
+		test_imagearraycopy3d.c \
+		test_imagecopy.c \
+		test_imagerandomcopy.c \
+		test_arrayimagecopy.c \
+		test_arrayimagecopy3d.c\
+		test_imagecopy3d.c \
+		test_enqueue_map.cpp \
+		test_work_item_functions.cpp \
+		test_astype.cpp \
+		test_async_copy.cpp \
+		test_async_strided_copy.cpp \
+		test_numeric_constants.cpp \
+		test_kernel_call_kernel_function.cpp \
+		test_basic_parameter_types.c \
+		test_vector_creation.cpp \
+		test_vec_type_hint.c \
+		test_preprocessors.cpp \
+		test_kernel_memory_alignment.cpp \
+		test_global_work_offsets.cpp \
+		../../test_common/harness/errorHelpers.c \
+		../../test_common/harness/threadTesting.c \
+		../../test_common/harness/testHarness.c \
+		../../test_common/harness/rounding_mode.c \
+		../../test_common/harness/kernelHelpers.c \
+		../../test_common/harness/typeWrappers.cpp \
+		../../test_common/harness/imageHelpers.cpp \
+                ../../test_common/harness/mt19937.c \
+		../../test_common/harness/conversions.c 
+
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_basic
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/main.c
+++ b/test_conformance/compatibility/test_conformance/basic/main.c
@@ -0,0 +1,263 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "procs.h"
+
+// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
+// (for example, generate_random_image_data()), the tests are required to declare
+// the following variables:
+cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+bool gTestRounding = false;
+
+basefn    basefn_list[] = {
+    test_hostptr,
+    test_fpmath_float,
+    test_fpmath_float2,
+    test_fpmath_float4,
+    test_intmath_int,
+    test_intmath_int2,
+    test_intmath_int4,
+    test_intmath_long,
+    test_intmath_long2,
+    test_intmath_long4,
+    test_hiloeo,
+    test_if,
+    test_sizeof,
+    test_loop,
+    test_pointer_cast,
+    test_local_arg_def,
+    test_local_kernel_def,
+    test_local_kernel_scope,
+    test_constant,
+    test_constant_source,
+    test_readimage,
+    test_readimage_int16,
+    test_readimage_fp32,
+    test_writeimage,
+    test_writeimage_int16,
+    test_writeimage_fp32,
+    test_multireadimageonefmt,
+
+    test_multireadimagemultifmt,
+    test_image_r8,
+    test_barrier,
+    test_int2float,
+    test_float2int,
+    test_imagereadwrite,
+    test_imagereadwrite3d,
+    test_readimage3d,
+    test_readimage3d_int16,
+    test_readimage3d_fp32,
+    test_bufferreadwriterect,
+    test_arrayreadwrite,
+    test_arraycopy,
+    test_imagearraycopy,
+    test_imagearraycopy3d,
+    test_imagecopy,
+    test_imagecopy3d,
+    test_imagerandomcopy,
+    test_arrayimagecopy,
+    test_arrayimagecopy3d,
+    test_imagenpot,
+
+    test_vload_global,
+    test_vload_local,
+    test_vload_constant,
+    test_vload_private,
+    test_vstore_global,
+    test_vstore_local,
+    test_vstore_private,
+
+    test_createkernelsinprogram,
+    test_imagedim_pow2,
+    test_imagedim_non_pow2,
+    test_image_param,
+    test_image_multipass_integer_coord,
+    test_image_multipass_float_coord,
+    test_explicit_s2v_bool,
+    test_explicit_s2v_char,
+    test_explicit_s2v_uchar,
+    test_explicit_s2v_short,
+    test_explicit_s2v_ushort,
+    test_explicit_s2v_int,
+    test_explicit_s2v_uint,
+    test_explicit_s2v_long,
+    test_explicit_s2v_ulong,
+    test_explicit_s2v_float,
+    test_explicit_s2v_double,
+
+    test_enqueue_map_buffer,
+    test_enqueue_map_image,
+
+    test_work_item_functions,
+
+    test_astype,
+
+    test_async_copy_global_to_local,
+    test_async_copy_local_to_global,
+    test_async_strided_copy_global_to_local,
+    test_async_strided_copy_local_to_global,
+    test_prefetch,
+
+    test_kernel_call_kernel_function,
+    test_host_numeric_constants,
+    test_kernel_numeric_constants,
+    test_kernel_limit_constants,
+    test_kernel_preprocessor_macros,
+
+    test_basic_parameter_types,
+    test_vector_creation,
+    test_vec_type_hint,
+    test_kernel_memory_alignment_local,
+    test_kernel_memory_alignment_global,
+    test_kernel_memory_alignment_constant,
+    test_kernel_memory_alignment_private,
+
+    test_global_work_offsets,
+    test_get_global_offset
+};
+
+const char    *basefn_names[] = {
+    "hostptr",
+    "fpmath_float",
+    "fpmath_float2",
+    "fpmath_float4",
+    "intmath_int",
+    "intmath_int2",
+    "intmath_int4",
+    "intmath_long",
+    "intmath_long2",
+    "intmath_long4",
+    "hiloeo",
+    "if",
+    "sizeof",
+    "loop",
+    "pointer_cast",
+    "local_arg_def",
+    "local_kernel_def",
+    "local_kernel_scope",
+    "constant",
+    "constant_source",
+    "readimage",
+    "readimage_int16",
+    "readimage_fp32",
+    "writeimage",
+    "writeimage_int16",
+    "writeimage_fp32",
+    "mri_one",
+
+    "mri_multiple",
+    "image_r8",
+    "barrier",
+    "int2float",
+    "float2int",
+    "imagereadwrite",
+    "imagereadwrite3d",
+    "readimage3d",
+    "readimage3d_int16",
+    "readimage3d_fp32",
+    "bufferreadwriterect",
+    "arrayreadwrite",
+    "arraycopy",
+    "imagearraycopy",
+    "imagearraycopy3d",
+    "imagecopy",
+    "imagecopy3d",
+    "imagerandomcopy",
+    "arrayimagecopy",
+    "arrayimagecopy3d",
+    "imagenpot",
+
+    "vload_global",
+    "vload_local",
+    "vload_constant",
+    "vload_private",
+    "vstore_global",
+    "vstore_local",
+    "vstore_private",
+
+    "createkernelsinprogram",
+    "imagedim_pow2",
+    "imagedim_non_pow2",
+    "image_param",
+    "image_multipass_integer_coord",
+    "image_multipass_float_coord",
+    "explicit_s2v_bool",
+    "explicit_s2v_char",
+    "explicit_s2v_uchar",
+    "explicit_s2v_short",
+    "explicit_s2v_ushort",
+    "explicit_s2v_int",
+    "explicit_s2v_uint",
+    "explicit_s2v_long",
+    "explicit_s2v_ulong",
+    "explicit_s2v_float",
+    "explicit_s2v_double",
+
+    "enqueue_map_buffer",
+    "enqueue_map_image",
+
+    "work_item_functions",
+
+    "astype",
+
+    "async_copy_global_to_local",
+    "async_copy_local_to_global",
+    "async_strided_copy_global_to_local",
+    "async_strided_copy_local_to_global",
+    "prefetch",
+
+    "kernel_call_kernel_function",
+    "host_numeric_constants",
+    "kernel_numeric_constants",
+    "kernel_limit_constants",
+    "kernel_preprocessor_macros",
+
+    "parameter_types",
+
+    "vector_creation",
+    "vec_type_hint",
+
+    "kernel_memory_alignment_local",
+    "kernel_memory_alignment_global",
+    "kernel_memory_alignment_constant",
+    "kernel_memory_alignment_private",
+
+    "global_work_offsets",
+    "get_global_offset",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+
+int main(int argc, const char *argv[])
+{
+    int err = runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+    return err;
+}
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/procs.h
+++ b/test_conformance/compatibility/test_conformance/basic/procs.h
@@ -0,0 +1,142 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/rounding_mode.h"
+
+extern void     memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
+
+extern int      test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_long2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_long4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_hiloeo(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_if(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_sizeof(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_loop(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_pointer_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_local_arg_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_local_kernel_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_local_kernel_scope(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_constant_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_writeimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_writeimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_writeimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multireadimageonefmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multireadimagemultifmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_r8(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagereadwrite3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage3d_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage3d_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements);
+extern int      test_imagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagerandomcopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+extern int      test_arrayimagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_arrayimagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagenpot(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_sampler_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_sampler_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_createkernelsinprogram(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_single_large_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_max_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_arrayreadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagedim_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagedim_non_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_param(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_multipass_integer_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_multipass_float_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_vload_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vload_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vload_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vload_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vstore_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vstore_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vstore_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_astype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_native_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int    test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+
+extern int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int test_global_work_offsets(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_get_global_offset(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/run_array
+++ b/test_conformance/compatibility/test_conformance/basic/run_array
@@ -0,0 +1,3 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic arrayreadwrite arraycopy bufferreadwriterect $@
--- a/test_conformance/compatibility/test_conformance/basic/run_array_image_copy
+++ b/test_conformance/compatibility/test_conformance/basic/run_array_image_copy
@@ -0,0 +1,3 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic arrayimagecopy arrayimagecopy3d imagearraycopy
--- a/test_conformance/compatibility/test_conformance/basic/run_image
+++ b/test_conformance/compatibility/test_conformance/basic/run_image
@@ -0,0 +1,17 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic  \
+imagecopy imagerandomcopy \
+imagearraycopy imagearraycopy3d \
+image_r8 \
+readimage readimage_int16 readimage_fp32 \
+writeimage writeimage_int16 writeimage_fp32 \
+imagenpot \
+image_param \
+image_multipass_integer_coord \
+readimage3d \
+readimage3d_int16 \
+readimage3d_fp32 \
+imagereadwrite3d \
+imagereadwrite \
+$@
--- a/test_conformance/compatibility/test_conformance/basic/run_multi_read_image
+++ b/test_conformance/compatibility/test_conformance/basic/run_multi_read_image
@@ -0,0 +1,4 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic mri_one mri_multiple
+
--- a/test_conformance/compatibility/test_conformance/basic/test_arraycopy.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_arraycopy.c
@@ -0,0 +1,201 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *copy_kernel_code =
+"__kernel void test_copy(__global unsigned int *src, __global unsigned int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src[tid];\n"
+"}\n";
+
+int
+test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_uint    *input_ptr, *output_ptr;
+    cl_mem                streams[4], results;
+    cl_program          program;
+    cl_kernel            kernel;
+    unsigned            num_elements = 128 * 1024;
+    cl_uint             num_copies = 1;
+    size_t                delta_offset;
+    unsigned            i;
+    cl_int err;
+    MTdata              d;
+
+    int error_count = 0;
+
+    input_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+
+    // results
+    results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed");
+
+/*****************************************************************************************************************************************/
+#pragma mark client backing
+
+    log_info("Testing CL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer\n");
+    // randomize data
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+
+    // client backing
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
+    test_error(err, "clCreateBuffer failed");
+
+    delta_offset = num_elements * sizeof(cl_uint) / num_copies;
+    for (i=0; i<num_copies; i++)
+    {
+        size_t    offset = i * delta_offset;
+        err = clEnqueueCopyBuffer(queue, streams[0], results, offset, offset, delta_offset, 0, NULL, NULL);
+        test_error(err, "clEnqueueCopyBuffer failed");
+    }
+
+    // Try upload from client backing
+    err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (input_ptr[i] != output_ptr[i])
+        {
+            err = -1;
+            error_count++;
+        }
+    }
+
+    if (err)
+        log_error("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer FAILED\n");
+    else
+        log_info("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer passed\n");
+
+
+
+#pragma mark framework backing (no client data)
+
+    log_info("Testing with clEnqueueWriteBuffer and clEnqueueCopyBuffer\n");
+    // randomize data
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+
+    // no backing
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed");
+
+    for (i=0; i<num_copies; i++)
+    {
+        size_t    offset = i * delta_offset;
+
+        // Copy the array up from host ptr
+        err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_uint)*num_elements, input_ptr, 0, NULL, NULL);
+        test_error(err, "clEnqueueWriteBuffer failed");
+
+        err = clEnqueueCopyBuffer(queue, streams[2], results, offset, offset, delta_offset, 0, NULL, NULL);
+        test_error(err, "clEnqueueCopyBuffer failed");
+    }
+
+    err = clEnqueueReadBuffer( queue, results, true, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (input_ptr[i] != output_ptr[i])
+        {
+            err = -1;
+            error_count++;
+            break;
+        }
+    }
+
+    if (err)
+        log_error("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer FAILED\n");
+    else
+        log_info("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer passed\n");
+
+/*****************************************************************************************************************************************/
+#pragma mark kernel copy test
+
+    log_info("Testing CL_MEM_USE_HOST_PTR buffer with kernel copy\n");
+    // randomize data
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+    free_mtdata(d); d= NULL;
+
+    // client backing
+  streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &copy_kernel_code, "test_copy" );
+  test_error(err, "create_single_kernel_helper failed");
+
+  err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
+  err |= clSetKernelArg(kernel, 1, sizeof results, &results);
+  test_error(err, "clSetKernelArg failed");
+
+  size_t threads[3] = {num_elements, 0, 0};
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+  test_error(err, "clEnqueueNDRangeKernel failed");
+
+    err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (input_ptr[i] != output_ptr[i])
+        {
+            err = -1;
+      error_count++;
+            break;
+        }
+    }
+
+  // Keep track of multiple errors.
+  if (error_count != 0)
+    err = error_count;
+
+    if (err)
+        log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
+    else
+        log_info("\tCL_MEM_USE_HOST_PTR buffer with kernel copy passed\n");
+
+
+  clReleaseProgram(program);
+  clReleaseKernel(kernel);
+  clReleaseMemObject(results);
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[2]);
+  clReleaseMemObject(streams[3]);
+
+  free(input_ptr);
+  free(output_ptr);
+
+    return err;
+}
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy.c
@@ -0,0 +1,143 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+int test_arrayimagecopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+{
+  cl_uchar    *bufptr, *imgptr;
+  clMemWrapper      buffer, image;
+  int        img_width = 512;
+  int        img_height = 512;
+  size_t    elem_size;
+  size_t    buffer_size;
+  int        i;
+  cl_int          err;
+  MTdata          d;
+  cl_event  copyevent;
+
+  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
+
+  image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
+  test_error(err, "create_image_2d failed");
+
+  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
+  test_error(err, "clGetImageInfo failed");
+
+  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
+
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  bufptr = (cl_uchar*)malloc(buffer_size);
+
+  d = init_genrand( gRandomSeed );
+  bufptr = (cl_uchar*)malloc(buffer_size);
+  for (i=0; i<(int)buffer_size; i++) {
+     bufptr[i] = (cl_uchar)genrand_int32(d);
+  }
+  free_mtdata(d); d = NULL;
+
+  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
+  err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteBuffer failed");
+
+  err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
+  test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+  imgptr = (cl_uchar*)malloc(buffer_size);
+
+  err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  if (memcmp(bufptr, imgptr, buffer_size) != 0) {
+    log_error( "ERROR: Results did not validate!\n" );
+    unsigned char * inchar = (unsigned char*)bufptr;
+    unsigned char * outchar = (unsigned char*)imgptr;
+    int failuresPrinted = 0;
+    int i;
+    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
+        int failed = 0;
+        int j;
+        for (j=0; j<(int)elem_size; j++)
+            if (inchar[i+j] != outchar[i+j])
+                failed = 1;
+        char values[4096];
+        values[0] = 0;
+        if (failed) {
+            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            int j;
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
+            sprintf(values + strlen(values), "] != expected [");
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+            failuresPrinted++;
+        }
+        if (failuresPrinted > 5) {
+            log_error("Not printing further failures...\n");
+            break;
+        }
+    }
+    err = -1;
+  }
+
+  free(bufptr);
+  free(imgptr);
+
+  if (err)
+    log_error("ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
+              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+
+  return err;
+}
+
+int test_arrayimagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  cl_int          err;
+  cl_image_format *formats;
+  cl_uint         num_formats;
+  cl_uint         i;
+
+  PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  for (i = 0; i < num_formats; i++) {
+    err |= test_arrayimagecopy_single_format(device, context, queue, &formats[i]);
+  }
+
+  if (err)
+    log_error("ARRAY to IMAGE copy test failed\n");
+  else
+    log_info("ARRAY to IMAGE copy test passed\n");
+
+  return err;
+}
--- a/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy3d.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy3d.c
@@ -0,0 +1,144 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+int test_arrayimagecopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+{
+  cl_uchar    *bufptr, *imgptr;
+  clMemWrapper      buffer, image;
+  int        img_width = 128;
+  int        img_height = 128;
+  int        img_depth = 32;
+  size_t    elem_size;
+  size_t    buffer_size;
+  int        i;
+  cl_int          err;
+  MTdata          d;
+  cl_event  copyevent;
+
+  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
+
+  image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+  test_error(err, "create_image_3d failed");
+
+  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
+  test_error(err, "clGetImageInfo failed");
+
+  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
+
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  bufptr = (cl_uchar*)malloc(buffer_size);
+
+  d = init_genrand( gRandomSeed );
+  bufptr = (cl_uchar*)malloc(buffer_size);
+  for (i=0; i<(int)buffer_size; i++) {
+     bufptr[i] = (cl_uchar)genrand_int32(d);
+  }
+  free_mtdata(d); d = NULL;
+
+  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
+  err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteBuffer failed");
+
+  err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
+  test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+  imgptr = (cl_uchar*)malloc(buffer_size);
+
+  err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  if (memcmp(bufptr, imgptr, buffer_size) != 0) {
+    log_error( "ERROR: Results did not validate!\n" );
+    unsigned char * inchar = (unsigned char*)bufptr;
+    unsigned char * outchar = (unsigned char*)imgptr;
+    int failuresPrinted = 0;
+    int i;
+    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
+        int failed = 0;
+        int j;
+        for (j=0; j<(int)elem_size; j++)
+            if (inchar[i+j] != outchar[i+j])
+                failed = 1;
+        char values[4096];
+        values[0] = 0;
+        if (failed) {
+            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            int j;
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
+            sprintf(values + strlen(values), "] != expected [");
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+            failuresPrinted++;
+        }
+        if (failuresPrinted > 5) {
+            log_error("Not printing further failures...\n");
+            break;
+        }
+    }
+    err = -1;
+  }
+
+  free(bufptr);
+  free(imgptr);
+
+  if (err)
+    log_error("ARRAY to IMAGE3D copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
+              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+
+  return err;
+}
+
+int test_arrayimagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  cl_int          err;
+  cl_image_format *formats;
+  cl_uint         num_formats;
+  cl_uint         i;
+
+  PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  for (i = 0; i < num_formats; i++) {
+    err |= test_arrayimagecopy3d_single_format(device, context, queue, &formats[i]);
+  }
+
+  if (err)
+    log_error("ARRAY to IMAGE3D copy test failed\n");
+  else
+    log_info("ARRAY to IMAGE3D copy test passed\n");
+
+  return err;
+}
--- a/test_conformance/compatibility/test_conformance/basic/test_arrayreadwrite.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_arrayreadwrite.c
@@ -0,0 +1,94 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+
+int
+test_arrayreadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_uint                *inptr, *outptr;
+    cl_mem              streams[1];
+    int                 num_tries = 400;
+    num_elements = 1024 * 1024 * 4;
+    int                 i, j, err;
+    MTdata              d;
+
+    inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
+    outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
+
+    // randomize data
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed");
+
+    for (i=0; i<num_tries; i++)
+    {
+        int        offset;
+        int        cb;
+
+        do {
+            offset = (int)(genrand_int32(d) & 0x7FFFFFFF);
+            if (offset > 0 && offset < num_elements)
+                break;
+        } while (1);
+        cb = (int)(genrand_int32(d) & 0x7FFFFFFF);
+        if (cb > (num_elements - offset))
+            cb = num_elements - offset;
+
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), sizeof(cl_uint)*cb,&inptr[offset], 0, NULL, NULL);
+        test_error(err, "clEnqueueWriteBuffer failed");
+
+        err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL );
+        test_error(err, "clEnqueueReadBuffer failed");
+
+        for (j=offset; j<offset+cb; j++)
+        {
+            if (inptr[j] != outptr[j])
+            {
+                log_error("ARRAY read, write test failed\n");
+                err = -1;
+                break;
+            }
+        }
+
+        if (err)
+            break;
+    }
+
+    free_mtdata(d);
+    clReleaseMemObject(streams[0]);
+    free(inptr);
+    free(outptr);
+
+    if (!err)
+        log_info("ARRAY read, write test passed\n");
+
+    return err;
+}
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_astype.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_astype.cpp
@@ -0,0 +1,289 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+
+static const char *astype_kernel_pattern =
+"%s\n"
+"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s%s tmp = as_%s%s( src[ tid ] );\n"
+"   dst[ tid ] = tmp;\n"
+"}\n";
+
+static const char *astype_kernel_pattern_V3srcV3dst =
+"%s\n"
+"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s%s tmp = as_%s%s( vload3(tid,src) );\n"
+"   vstore3(tmp,tid,dst);\n"
+"}\n";
+// in the printf, remove the third and fifth argument, each of which
+// should be a "3", when copying from the printf for astype_kernel_pattern
+
+static const char *astype_kernel_pattern_V3dst =
+"%s\n"
+"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s3 tmp = as_%s3( src[ tid ] );\n"
+"   vstore3(tmp,tid,dst);\n"
+"}\n";
+// in the printf, remove the fifth argument, which
+// should be a "3", when copying from the printf for astype_kernel_pattern
+
+
+static const char *astype_kernel_pattern_V3src =
+"%s\n"
+"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s%s tmp = as_%s%s( vload3(tid,src) );\n"
+"   dst[ tid ] = tmp;\n"
+"}\n";
+// in the printf, remove the third argument, which
+// should be a "3", when copying from the printf for astype_kernel_pattern
+
+
+int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
+                    unsigned int vecSize, unsigned int outVecSize,
+                    int numElements )
+{
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+
+    char programSrc[ 10240 ];
+    size_t threads[ 1 ], localThreads[ 1 ];
+    size_t typeSize = get_explicit_type_size( inVecType );
+    size_t outTypeSize = get_explicit_type_size(outVecType);
+    char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    MTdata d;
+
+
+
+    // Create program
+    if(outVecSize == 3 && vecSize == 3) {
+        // astype_kernel_pattern_V3srcV3dst
+        sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
+    } else if(outVecSize == 3) {
+        // astype_kernel_pattern_V3dst
+        sprintf( programSrc, astype_kernel_pattern_V3dst,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ),
+                get_explicit_type_name( outVecType ),
+                get_explicit_type_name( outVecType ));
+
+    } else if(vecSize == 3) {
+        // astype_kernel_pattern_V3src
+        sprintf( programSrc, astype_kernel_pattern_V3src,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
+    } else {
+        sprintf( programSrc, astype_kernel_pattern,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
+    }
+
+    const char *ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+
+    // Create some input values
+    size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
+    char *inBuffer = (char*)malloc( inBufferSize );
+    size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
+    char *outBuffer = (char*)malloc( outBufferSize );
+
+    d = init_genrand( gRandomSeed );
+    generate_random_data( inVecType, numElements * vecSize,
+                         d, inBuffer );
+    free_mtdata(d); d = NULL;
+
+    // Create I/O streams and set arguments
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
+    test_error( error, "Unable to create I/O stream" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
+    test_error( error, "Unable to create I/O stream" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel argument" );
+
+
+    // Run the kernel
+    threads[ 0 ] = numElements;
+    error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
+    test_error( error, "Unable to get group size to run with" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+
+    // Get the results and compare
+    // The beauty is that astype is supposed to return the bit pattern as a different type, which means
+    // the output should have the exact same bit pattern as the input. No interpretation necessary!
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    char *expected = inBuffer;
+    char *actual = outBuffer;
+    size_t compSize = typeSize*vecSize;
+    if(outTypeSize*outVecSize < compSize) {
+        compSize = outTypeSize*outVecSize;
+    }
+
+    if(outVecSize == 4 && vecSize == 3)
+    {
+        // as_type4(vec3) should compile but produce undefined results??
+        free(inBuffer);
+        free(outBuffer);
+        return 0;
+    }
+
+    if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
+    {
+        // as_typen(vecm) should compile and run but produce
+        // implementation-defined results for m != n
+        // and n*sizeof(type) = sizeof(vecm)
+        free(inBuffer);
+        free(outBuffer);
+        return 0;
+    }
+
+    for( int i = 0; i < numElements; i++ )
+    {
+        if( memcmp( expected, actual, compSize ) != 0 )
+        {
+            char expectedString[ 1024 ], actualString[ 1024 ];
+            log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
+                      (int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
+                      GetDataVectorString( expected, typeSize, vecSize, expectedString ),
+                      GetDataVectorString( actual, typeSize, vecSize, actualString ) );
+            log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
+                      programSrc, (int)threads[0],(int) localThreads[0]);
+            free(inBuffer);
+            free(outBuffer);
+            return 1;
+        }
+        expected += typeSize * vecSize;
+        actual += outTypeSize * outVecSize;
+    }
+
+    free(inBuffer);
+    free(outBuffer);
+    return 0;
+}
+
+int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
+    // legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
+    // for us to verify what is "valid". So the only thing we can test are types that match in size independent
+    // of the element count (char -> uchar, etc)
+    ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
+    size_t inTypeSize, outTypeSize;
+    int error = 0;
+
+    for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
+    {
+        inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
+
+        if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
+            continue;
+
+        if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
+            continue;
+
+        for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
+        {
+            outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
+            if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
+                continue;
+            }
+
+            if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
+                continue;
+
+            // change this check
+            if( inTypeIdx == outTypeIdx ) {
+                continue;
+            }
+
+            log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
+            fflush( stdout );
+
+            for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
+            {
+
+                for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
+                {
+                    if(vecSizes[sizeIdx]*inTypeSize !=
+                       vecSizes[outSizeIdx]*outTypeSize )
+                    {
+                        continue;
+                    }
+                    error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
+
+
+                }
+
+            }
+            if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
+               get_explicit_type_size(vecTypes[outTypeIdx])) {
+                // as_type3(vec4) allowed, as_type4(vec3) not allowed
+                error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
+                error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
+            }
+
+        }
+    }
+    return error;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_async_copy.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_async_copy.cpp
@@ -0,0 +1,276 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+
+static const char *async_global_to_local_kernel =
+"%s\n" // optional pragma string
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"     localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+"    barrier( CLK_LOCAL_MEM_FENCE );\n"
+"    event_t event;\n"
+"    event = async_work_group_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );\n"
+// Wait for the copy to complete, then verify by manually copying to the dest
+"    wait_group_events( 1, &event );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
+"}\n" ;
+
+static const char *async_local_to_global_kernel =
+"%s\n" // optional pragma string
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+"    barrier( CLK_LOCAL_MEM_FENCE );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
+// Do this to verify all kernels are done copying to the local buffer before we try the copy
+"    barrier( CLK_LOCAL_MEM_FENCE );\n"
+"    event_t event;\n"
+"    event = async_work_group_copy((__global %s*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, 0 );\n"
+"    wait_group_events( 1, &event );\n"
+"}\n" ;
+
+
+static const char *prefetch_kernel =
+"%s\n" // optional pragma string
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
+"{\n"
+" // Ignore this: %s%s%s\n"
+" int i;\n"
+" prefetch( (const __global %s*)(src+copiesPerWorkItem*get_global_id(0)), copiesPerWorkItem);\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
+"}\n" ;
+
+
+
+int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode,
+              ExplicitType vecType, int vecSize
+              )
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+    size_t threads[ 1 ], localThreads[ 1 ];
+    void *inBuffer, *outBuffer;
+    MTdata d;
+    char vecNameString[64]; vecNameString[0] = 0;
+    if (vecSize == 1)
+        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
+    else
+        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
+
+
+    size_t elementSize = get_explicit_type_size(vecType)*vecSize;
+    log_info("Testing %s\n", vecNameString);
+
+    cl_long max_local_mem_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
+
+    unsigned int num_of_compute_devices;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
+
+    char programSource[4096]; programSource[0]=0;
+    char *programPtr;
+
+    sprintf(programSource, kernelCode,
+            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+            vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
+    //log_info("program: %s\n", programSource);
+    programPtr = programSource;
+
+    error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+    size_t max_workgroup_size;
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
+    test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
+
+    size_t max_local_workgroup_size[3];
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    // Pick the minimum of the device and the kernel
+    if (max_workgroup_size > max_local_workgroup_size[0])
+        max_workgroup_size = max_local_workgroup_size[0];
+
+    size_t numberOfCopiesPerWorkitem = 13;
+    size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
+    size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
+
+    // Calculation can return 0 on embedded devices due to 1KB local mem limit
+    if(maxLocalWorkgroupSize == 0)
+    {
+        maxLocalWorkgroupSize = 1;
+    }
+
+    size_t localWorkgroupSize = maxLocalWorkgroupSize;
+    if (maxLocalWorkgroupSize > max_workgroup_size)
+        localWorkgroupSize = max_workgroup_size;
+
+    size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
+    size_t numberOfLocalWorkgroups = 1111;
+    size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize;
+    size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
+
+    inBuffer = (void*)malloc(globalBufferSize);
+    outBuffer = (void*)malloc(globalBufferSize);
+    memset(outBuffer, 0, globalBufferSize);
+
+    cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
+    copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
+    copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
+
+    log_info("Global: %d, local %d, local buffer %db, global buffer %db, each work group will copy %d elements and each work item item will copy %d elements.\n",
+             (int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, copiesPerWorkgroup, copiesPerWorkItemInt);
+
+    threads[0] = globalWorkgroupSize;
+    localThreads[0] = localWorkgroupSize;
+
+    d = init_genrand( gRandomSeed );
+    generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
+    free_mtdata(d); d = NULL;
+
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
+    test_error( error, "Unable to create input buffer" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Enqueue
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to queue kernel" );
+
+    // Read
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Verify
+    if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 )
+    {
+        log_error( "ERROR: Results of copy did not validate!\n" );
+        unsigned char * inchar = (unsigned char*)inBuffer;
+        unsigned char * outchar = (unsigned char*)outBuffer;
+        int failuresPrinted = 0;
+        for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) {
+            int failed = 0;
+            for (int j=0; j<(int)elementSize; j++)
+                if (inchar[i+j] != outchar[i+j])
+                    failed = 1;
+            char values[4096];
+            values[0] = 0;
+            if (failed) {
+                sprintf(values + strlen( values), "%d -> [", i);
+                for (int j=0; j<(int)elementSize; j++)
+                    sprintf(values + strlen( values), "%2x ", inchar[i+j]);
+                sprintf(values + strlen(values), "] != [");
+                for (int j=0; j<(int)elementSize; j++)
+                    sprintf(values + strlen( values), "%2x ", outchar[i+j]);
+                sprintf(values + strlen(values), "]");
+                log_error("%s\n", values);
+                failuresPrinted++;
+            }
+            if (failuresPrinted > 5) {
+                log_error("Not printing further failures...\n");
+                break;
+            }
+        }
+        return -1;
+    }
+
+    free(inBuffer);
+    free(outBuffer);
+
+    return 0;
+}
+
+int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) {
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
+    unsigned int size, typeIndex;
+
+    int errors = 0;
+
+    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            continue;
+
+        if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
+            continue;
+
+        for( size = 0; vecSizes[ size ] != 0; size++ )
+        {
+            if (test_copy( deviceID, context, queue, kernelCode, vecType[typeIndex],vecSizes[size] )) {
+                errors++;
+            }
+        }
+    }
+    if (errors)
+        return -1;
+    return 0;
+}
+
+
+
+
+int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel );
+}
+
+int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_copy_all_types( deviceID, context, queue, async_local_to_global_kernel );
+}
+
+int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_copy_all_types( deviceID, context, queue, prefetch_kernel );
+}
+
--- a/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp
@@ -0,0 +1,267 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+
+static const char *async_strided_global_to_local_kernel =
+"%s\n" // optional pragma string
+"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+" barrier( CLK_LOCAL_MEM_FENCE );\n"
+" event_t event;\n"
+" event = async_work_group_strided_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*stride*get_group_id(0)), (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
+// Wait for the copy to complete, then verify by manually copying to the dest
+" wait_group_events( 1, &event );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   dst[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
+"}\n" ;
+
+static const char *async_strided_local_to_global_kernel =
+"%s\n" // optional pragma string
+"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+" barrier( CLK_LOCAL_MEM_FENCE );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ];\n"
+// Do this to verify all kernels are done copying to the local buffer before we try the copy
+" barrier( CLK_LOCAL_MEM_FENCE );\n"
+" event_t event;\n"
+" event = async_work_group_strided_copy((__global %s*)(dst+copiesPerWorkgroup*stride*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
+" wait_group_events( 1, &event );\n"
+"}\n" ;
+
+
+int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+    size_t threads[ 1 ], localThreads[ 1 ];
+    void *inBuffer, *outBuffer;
+    MTdata d;
+    char vecNameString[64]; vecNameString[0] = 0;
+
+    if (vecSize == 1)
+        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
+    else
+        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
+
+
+    size_t elementSize = get_explicit_type_size(vecType)*vecSize;
+    log_info("Testing %s\n", vecNameString);
+
+    cl_long max_local_mem_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
+
+    unsigned int num_of_compute_devices;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
+
+    char programSource[4096]; programSource[0]=0;
+    char *programPtr;
+
+    sprintf(programSource, kernelCode,
+        vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+        "",
+        vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
+    //log_info("program: %s\n", programSource);
+    programPtr = programSource;
+
+    error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+    size_t max_workgroup_size;
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
+    test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
+
+    size_t max_local_workgroup_size[3];
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+  // Pick the minimum of the device and the kernel
+    if (max_workgroup_size > max_local_workgroup_size[0])
+        max_workgroup_size = max_local_workgroup_size[0];
+
+    cl_ulong max_global_mem_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(max_global_mem_size), &max_global_mem_size, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
+
+    cl_bool unified_mem;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unified_mem), &unified_mem, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
+
+    int number_of_global_mem_buffers = (unified_mem) ? 4 : 2;
+
+    size_t numberOfCopiesPerWorkitem = 3;
+    size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
+    size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
+
+    size_t localWorkgroupSize = maxLocalWorkgroupSize;
+    if (maxLocalWorkgroupSize > max_workgroup_size)
+        localWorkgroupSize = max_workgroup_size;
+
+    size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
+    size_t numberOfLocalWorkgroups = 579;//1111;
+
+    // Reduce the numberOfLocalWorkgroups so that no more than 1/2 of CL_DEVICE_GLOBAL_MEM_SIZE is consumed
+    // by the allocated buffer. This is done to avoid resource  errors resulting from address space fragmentation.
+    size_t numberOfLocalWorkgroupsLimit = max_global_mem_size / (2 * number_of_global_mem_buffers * localBufferSize * stride);
+    if (numberOfLocalWorkgroups > numberOfLocalWorkgroupsLimit) numberOfLocalWorkgroups = numberOfLocalWorkgroupsLimit;
+
+    size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride;
+    size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
+
+    inBuffer = (void*)malloc(globalBufferSize);
+    outBuffer = (void*)malloc(globalBufferSize);
+    memset(outBuffer, 0, globalBufferSize);
+
+    cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
+    copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
+    copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
+
+    log_info("Global: %d, local %d, local buffer %db, global buffer %db, copy stride %d, each work group will copy %d elements and each work item item will copy %d elements.\n",
+                (int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, (int)stride, copiesPerWorkgroup, copiesPerWorkItemInt);
+
+    threads[0] = globalWorkgroupSize;
+    localThreads[0] = localWorkgroupSize;
+
+    d = init_genrand( gRandomSeed );
+    generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
+    free_mtdata(d); d = NULL;
+
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
+    test_error( error, "Unable to create input buffer" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 5, sizeof(stride), &stride );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Enqueue
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to queue kernel" );
+
+    // Read
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Verify
+    for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
+    {
+        if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, elementSize) != 0 )
+        {
+            unsigned char * inchar = (unsigned char*)inBuffer + i;
+            unsigned char * outchar = (unsigned char*)outBuffer + i;
+            char values[4096];
+            values[0] = 0;
+
+            log_error( "ERROR: Results of copy did not validate!\n" );
+            sprintf(values + strlen( values), "%d -> [", i);
+            for (int j=0; j<(int)elementSize; j++)
+                sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
+            sprintf(values + strlen(values), "] != [");
+            for (int j=0; j<(int)elementSize; j++)
+                sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+
+               return -1;
+        }
+    }
+
+    free(inBuffer);
+    free(outBuffer);
+
+    return 0;
+}
+
+int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
+    unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
+    unsigned int size, typeIndex, stride;
+
+    int errors = 0;
+
+    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            continue;
+
+        if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
+            continue;
+
+        for( size = 0; vecSizes[ size ] != 0; size++ )
+        {
+            for( stride = 0; strideSizes[ stride ] != 0; stride++)
+            {
+                if (test_strided_copy( deviceID, context, queue, kernelCode, vecType[typeIndex], vecSizes[size], strideSizes[stride] ))
+                {
+                    errors++;
+                }
+            }
+        }
+    }
+    if (errors)
+        return -1;
+    return 0;
+}
+
+
+
+
+int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
+}
+
+int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_strided_copy_all_types( deviceID, context, queue, async_strided_local_to_global_kernel );
+}
+
--- a/test_conformance/compatibility/test_conformance/basic/test_barrier.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_barrier.c
@@ -0,0 +1,158 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *barrier_kernel_code =
+"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
+"{\n"
+"    int  tid = get_local_id(0);\n"
+"    int  lsize = get_local_size(0);\n"
+"    int  i;\n"
+"\n"
+"    tmp_sum[tid] = 0;\n"
+"    for (i=tid; i<n; i+=lsize)\n"
+"        tmp_sum[tid] += a[i];\n"
+"     \n"
+"     // updated to work for any workgroup size \n"
+"    for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
+"    {\n"
+"        barrier(CLK_GLOBAL_MEM_FENCE);\n"
+"        if (tid + i < lsize)\n"
+"            tmp_sum[tid] += tmp_sum[tid + i];\n"
+"         lsize = i; \n"
+"    }\n"
+"\n"
+"     //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
+"    if (tid == 0)\n"
+"        *sum = tmp_sum[0];\n"
+"}\n";
+
+
+static int
+verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
+{
+  int            r = 0;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+        r += inptr[i];
+  }
+
+    if (r != outptr[0])
+    {
+        log_error("BARRIER test failed\n");
+        return -1;
+    }
+
+  log_info("BARRIER test passed\n");
+  return 0;
+}
+
+
+int
+test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[3];
+    cl_int            *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    global_threads[3];
+    size_t    local_threads[3];
+    int                err;
+    int                i;
+    size_t max_local_workgroup_size[3];
+    size_t max_threadgroup_size = 0;
+    MTdata d;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" );
+    test_error(err, "Failed to build kernel/program.");
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
+                                 sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
+    test_error(err, "clGetKernelWorkgroupInfo failed.");
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    // Pick the minimum of the device and the kernel
+    if (max_threadgroup_size > max_local_workgroup_size[0])
+        max_threadgroup_size = max_local_workgroup_size[0];
+
+    // work group size must divide evenly into the global size
+    while( num_elements % max_threadgroup_size )
+        max_threadgroup_size--;
+
+    input_ptr = (int*)malloc(sizeof(int) * num_elements);
+    output_ptr = (int*)malloc(sizeof(int));
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int), NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * max_threadgroup_size, NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
+    free_mtdata(d);  d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed.");
+
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
+    err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+    err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
+    test_error(err, "clSetKernelArg failed.");
+
+    global_threads[0] = max_threadgroup_size;
+    local_threads[0] = max_threadgroup_size;
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
+    test_error(err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed.");
+
+    err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_basic_parameter_types.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_basic_parameter_types.c
@@ -0,0 +1,302 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+const char *kernel_code =
+"__kernel void test_kernel(\n"
+"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n"
+"__global float%s *result)\n"
+"{\n"
+"  result[0] = %s(c);\n"
+"  result[1] = %s(uc);\n"
+"  result[2] = %s(s);\n"
+"  result[3] = %s(us);\n"
+"  result[4] = %s(i);\n"
+"  result[5] = %s(ui);\n"
+"  result[6] = f;\n"
+"}\n";
+
+const char *kernel_code_long =
+"__kernel void test_kernel_long(\n"
+"long%s l, ulong%s ul,\n"
+"__global float%s *result)\n"
+"{\n"
+"  result[0] = %s(l);\n"
+"  result[1] = %s(ul);\n"
+"}\n";
+
+int
+test_basic_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+     clMemWrapper results;
+  int error;
+  size_t global[3] = {1, 1, 1};
+  float results_back[2*16];
+  int count, index;
+  const char* types[] = { "long", "ulong" };
+  char kernel_string[8192];
+  int sizes[] = {1, 2, 4, 8, 16};
+  const char* size_strings[] = {"", "2", "4", "8", "16"};
+  float expected;
+  int total_errors = 0;
+  int size_to_test;
+  char *ptr;
+  char convert_string[1024];
+  size_t max_parameter_size;
+
+  // We don't really care about the contents since we're just testing that the types work.
+  cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+  // Calculate how large our paramter size is to the kernel
+  size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
+
+  // Init our strings.
+  kernel_string[0] = '\0';
+  convert_string[0] = '\0';
+
+  // Get the maximum parameter size allowed
+  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+  // Create the results buffer
+  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
+  test_error(error, "clCreateBuffer failed");
+
+  // Go over all the vector sizes
+  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
+    if (total_parameter_size > max_parameter_size) {
+      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
+               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
+      continue;
+    }
+
+    log_info("Testing vector size %d\n", sizes[size_to_test]);
+
+    // If size is > 1, then we need a explicit convert call.
+    if (sizes[size_to_test] > 1) {
+      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
+    } else {
+      sprintf(convert_string, " ");
+    }
+
+    // Build the kernel
+    sprintf(kernel_string, kernel_code_long,
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            convert_string, convert_string
+    );
+
+    ptr = kernel_string;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long");
+    test_error(error, "create single kernel failed");
+
+    // Set the arguments
+    for (count = 0; count < 2; count++) {
+      switch (count) {
+        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break;
+        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break;
+        default: log_error("Test error"); break;
+      }
+      if (error)
+        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
+      test_error(error, "clSetKernelArgs failed");
+    }
+    error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
+    test_error(error, "clSetKernelArgs failed");
+
+    // Execute
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    // Read back the results
+    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    // Verify the results
+    for (count = 0; count < 2; count++) {
+      for (index=0; index < sizes[size_to_test]; index++) {
+        switch (count) {
+          case 0: expected = (float)l[index]; break;
+          case 1: expected = (float)ul[index]; break;
+          default: log_error("Test error"); break;
+        }
+
+        if (results_back[count*sizes[size_to_test]+index] != expected) {
+          total_errors++;
+          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
+                    index, results_back[count*sizes[size_to_test]+index], expected);
+        }
+      }
+    }
+  }
+
+  return total_errors;
+}
+
+int
+test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+     clMemWrapper results;
+  int error;
+  size_t global[3] = {1, 1, 1};
+  float results_back[7*16];
+  int count, index;
+  const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"};
+  char kernel_string[8192];
+  int sizes[] = {1, 2, 4, 8, 16};
+  const char* size_strings[] = {"", "2", "4", "8", "16"};
+  float expected;
+  int total_errors = 0;
+  int size_to_test;
+  char *ptr;
+  char convert_string[1024];
+  size_t max_parameter_size;
+
+  // We don't really care about the contents since we're just testing that the types work.
+  cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+
+  // Calculate how large our paramter size is to the kernel
+  size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) +
+  sizeof(cl_short) +sizeof(cl_ushort) +
+  sizeof(cl_int) +sizeof(cl_uint) +
+  sizeof(cl_float);
+
+  // Init our strings.
+  kernel_string[0] = '\0';
+  convert_string[0] = '\0';
+
+  // Get the maximum parameter size allowed
+  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+  // Create the results buffer
+  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
+  test_error(error, "clCreateBuffer failed");
+
+  // Go over all the vector sizes
+  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
+    if (total_parameter_size > max_parameter_size) {
+      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
+               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
+      continue;
+    }
+
+    log_info("Testing vector size %d\n", sizes[size_to_test]);
+
+    // If size is > 1, then we need a explicit convert call.
+    if (sizes[size_to_test] > 1) {
+      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
+    } else {
+      sprintf(convert_string, " ");
+    }
+
+    // Build the kernel
+    sprintf(kernel_string, kernel_code,
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            size_strings[size_to_test], size_strings[size_to_test],
+            convert_string, convert_string, convert_string,
+            convert_string, convert_string, convert_string
+    );
+
+    ptr = kernel_string;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel");
+    test_error(error, "create single kernel failed");
+
+    // Set the arguments
+    for (count = 0; count < 7; count++) {
+      switch (count) {
+        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break;
+        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break;
+        case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break;
+        case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break;
+        case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break;
+        case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break;
+        case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break;
+        default: log_error("Test error"); break;
+      }
+      if (error)
+        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
+      test_error(error, "clSetKernelArgs failed");
+    }
+    error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
+    test_error(error, "clSetKernelArgs failed");
+
+    // Execute
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    // Read back the results
+    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    // Verify the results
+    for (count = 0; count < 7; count++) {
+      for (index=0; index < sizes[size_to_test]; index++) {
+        switch (count) {
+          case 0: expected = (float)c[index]; break;
+          case 1: expected = (float)uc[index]; break;
+          case 2: expected = (float)s[index]; break;
+          case 3: expected = (float)us[index]; break;
+          case 4: expected = (float)i[index]; break;
+          case 5: expected = (float)ui[index]; break;
+          case 6: expected = (float)f[index]; break;
+          default: log_error("Test error"); break;
+        }
+
+        if (results_back[count*sizes[size_to_test]+index] != expected) {
+          total_errors++;
+          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
+                    index, results_back[count*sizes[size_to_test]+index], expected);
+        }
+      }
+    }
+  }
+
+  if (gHasLong) {
+    log_info("Testing long types...\n");
+    total_errors += test_basic_parameter_types_long( device, context, queue, num_elements );
+  }
+  else {
+    log_info("Longs unsupported, skipping.");
+  }
+
+  return total_errors;
+}
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_bufferreadwriterect.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_bufferreadwriterect.c
@@ -0,0 +1,529 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#define CL_EXIT_ERROR(cmd,format,...)                \
+{                                \
+if ((cmd) != CL_SUCCESS) {                    \
+log_error("CL ERROR: %s %u: ", __FILE__,__LINE__);    \
+log_error(format,## __VA_ARGS__ );            \
+log_error("\n");                        \
+/*abort();*/                \
+}                                \
+}
+
+typedef unsigned char BufferType;
+
+// Globals for test
+cl_command_queue queue;
+
+// Width and height of each pair of images.
+enum { TotalImages = 8 };
+size_t width  [TotalImages];
+size_t height [TotalImages];
+size_t depth  [TotalImages];
+
+// cl buffer and host buffer.
+cl_mem buffer [TotalImages];
+BufferType* verify[TotalImages];
+BufferType* backing[TotalImages];
+
+// Temporary buffer used for read and write operations.
+BufferType* tmp_buffer;
+size_t tmp_buffer_size;
+
+size_t num_tries   = 50; // Number of randomly selected operations to perform.
+size_t alloc_scale = 2;   // Scale term applied buffer allocation size.
+MTdata mt;
+
+// Initialize a buffer in host memory containing random values of the specified size.
+static void initialize_image(BufferType* ptr, size_t w, size_t h, size_t d, MTdata mt)
+{
+    enum { ElementSize = sizeof(BufferType)/sizeof(unsigned char) };
+
+    unsigned char* buf = (unsigned char*)ptr;
+    size_t size = w*h*d*ElementSize;
+
+    for (size_t i = 0; i != size; i++) {
+        buf[i] = (unsigned char)(genrand_int32(mt) % 0xff);
+    }
+}
+
+// This function prints the contents of a buffer to standard error.
+void print_buffer(BufferType* buf, size_t w, size_t h, size_t d) {
+    log_error("Size = %lux%lux%lu (%lu total)\n",w,h,d,w*h*d);
+    for (unsigned k=0; k!=d;++k) {
+        log_error("Slice: %u\n",k);
+        for (unsigned j=0; j!=h;++j) {
+            for (unsigned i=0;i!=w;++i) {
+                log_error("%02x",buf[k*(w*h)+j*w+i]);
+            }
+            log_error("\n");
+        }
+        log_error("\n");
+    }
+}
+
+// Returns true if the two specified regions overlap.
+bool check_overlap(const size_t src_offset[3], const size_t dst_offset[3], const size_t region[3]) {
+
+    const size_t src_min[] = {src_offset[0], src_offset[1], src_offset[2]};
+    const size_t src_max[] = {src_offset[0]+region[0], src_offset[1]+region[1], src_offset[2]+region[2]};
+
+    const size_t dst_min[] = {dst_offset[0], dst_offset[1], dst_offset[2]};
+    const size_t dst_max[] = {dst_offset[0]+region[0], dst_offset[1]+region[1], dst_offset[2]+region[2]};
+
+    // Check for overlap, using the span space formulation.
+    bool overlap = true;
+    unsigned i;
+    for (i=0; i != 3; ++i) {
+        overlap = overlap && (src_min[i] < dst_max[i]) && (src_max[i] > dst_min[i]);
+    }
+
+    return overlap;
+}
+
+// This function invokes the CopyBufferRect CL command and then mirrors the operation on the host side verify buffers.
+int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
+
+    // Copy between cl buffers.
+    size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
+    size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
+
+    cl_int err;
+    if (check_overlap(soffset,doffset,sregion)) {
+        log_info( "Copy overlap reported, skipping copy buffer rect\n" );
+        return CL_SUCCESS;
+    } else {
+        if ((err = clEnqueueCopyBufferRect(queue,
+                                         buffer[src],buffer[dst],
+                                         soffset, doffset,
+                                         sregion,/*dregion,*/
+                                         width[src], src_slice_pitch,
+                                         width[dst], dst_slice_pitch,
+                                         0, NULL, NULL)) != CL_SUCCESS)
+        {
+            CL_EXIT_ERROR(err, "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
+        }
+    }
+
+    // Copy between host buffers.
+    size_t total = sregion[0] * sregion[1] * sregion[2];
+
+    size_t spitch = width[src];
+    size_t sslice = width[src]*height[src];
+
+    size_t dpitch = width[dst];
+    size_t dslice = width[dst]*height[dst];
+
+    for (size_t i = 0; i != total; ++i) {
+
+        // Compute the coordinates of the element within the source and destination regions.
+        size_t rslice = sregion[0]*sregion[1];
+        size_t sz = i / rslice;
+        size_t sy = (i % rslice) / sregion[0];
+        size_t sx = (i % rslice) % sregion[0];
+
+        size_t dz = sz;
+        size_t dy = sy;
+        size_t dx = sx;
+
+        // Compute the offset in bytes of the source and destination.
+        size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
+        size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
+
+        verify[dst][d_idx] = verify[src][s_idx];
+    }
+
+    return 0;
+}
+
+// This function compares the destination region in the buffer pointed
+// to by device, to the source region of the specified verify buffer.
+int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) {
+
+    // Copy between host buffers.
+    size_t spitch = width[src];
+    size_t sslice = width[src]*height[src];
+
+    size_t dpitch = width[dst];
+    size_t dslice = width[dst]*height[dst];
+
+    size_t total = sregion[0] * sregion[1] * sregion[2];
+    for (size_t i = 0; i != total; ++i) {
+
+        // Compute the coordinates of the element within the source and destination regions.
+        size_t rslice = sregion[0]*sregion[1];
+        size_t sz = i / rslice;
+        size_t sy = (i % rslice) / sregion[0];
+        size_t sx = (i % rslice) % sregion[0];
+
+        // Compute the offset in bytes of the source and destination.
+        size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
+        size_t d_idx = (doffset[2]+sz)*dslice + (doffset[1]+sy)*dpitch + doffset[0]+sx;
+
+        if (device[d_idx] != verify[src][s_idx]) {
+            log_error("Verify failed on comparsion %lu: coordinate (%lu, %lu, %lu) of region\n",i,sx,sy,sz);
+            log_error("0x%02x != 0x%02x\n", device[d_idx], verify[src][s_idx]);
+#if 0
+            // Uncomment this section to print buffers.
+            log_error("Device (copy): [%lu]\n",dst);
+            print_buffer(device,width[dst],height[dst],depth[dst]);
+            log_error("\n");
+            log_error("Verify: [%lu]\n",src);
+            print_buffer(verify[src],width[src],height[src],depth[src]);
+            log_error("\n");
+            abort();
+#endif
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+// This function invokes ReadBufferRect to read a region from the
+// specified source buffer into a temporary destination buffer. The
+// contents of the temporary buffer are then compared to the source
+// region of the corresponding verify buffer.
+int read_verify_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
+
+    // Clear the temporary destination host buffer.
+    memset(tmp_buffer, 0xff, tmp_buffer_size);
+
+    size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
+    size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
+
+    // Copy the source region of the cl buffer, to the destination region of the temporary buffer.
+    CL_EXIT_ERROR(clEnqueueReadBufferRect(queue,
+                                          buffer[src],
+                                          CL_TRUE,
+                                          soffset,doffset,
+                                          sregion,
+                                          width[src], src_slice_pitch,
+                                          width[dst], dst_slice_pitch,
+                                          tmp_buffer,
+                                          0, NULL, NULL), "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
+
+    return verify_region(tmp_buffer,src,soffset,sregion,dst,doffset);
+}
+
+// This function performs the same verification check as
+// read_verify_region, except a MapBuffer command is used to access the
+// device buffer data instead of a ReadBufferRect, and the whole
+// buffer is checked.
+int map_verify_region(size_t src) {
+
+    size_t size_bytes = width[src]*height[src]*depth[src]*sizeof(BufferType);
+
+    // Copy the source region of the cl buffer, to the destination region of the temporary buffer.
+    cl_int err;
+    BufferType* mapped = (BufferType*)clEnqueueMapBuffer(queue,buffer[src],CL_TRUE,CL_MAP_READ,0,size_bytes,0,NULL,NULL,&err);
+    CL_EXIT_ERROR(err, "clEnqueueMapBuffer failed for buffer %u",(unsigned)src);
+
+    size_t soffset[] = { 0, 0, 0 };
+    size_t sregion[] = { width[src], height[src], depth[src] };
+
+    int ret = verify_region(mapped,src,soffset,sregion,src,soffset);
+
+    CL_EXIT_ERROR(clEnqueueUnmapMemObject(queue,buffer[src],mapped,0,NULL,NULL),
+                  "clEnqueueUnmapMemObject failed for buffer %u",(unsigned)src);
+
+    return ret;
+}
+
+// This function generates a new temporary buffer and then writes a
+// region of it to a region in the specified destination buffer.
+int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
+
+    initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt);
+    // memset(tmp_buffer, 0xf0, tmp_buffer_size);
+
+    size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
+    size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
+
+    // Copy the source region of the cl buffer, to the destination region of the temporary buffer.
+    CL_EXIT_ERROR(clEnqueueWriteBufferRect(queue,
+                                           buffer[dst],
+                                           CL_TRUE,
+                                           doffset,soffset,
+    /*sregion,*/dregion,
+                                           width[dst], dst_slice_pitch,
+                                           width[src], src_slice_pitch,
+                                           tmp_buffer,
+                                           0, NULL, NULL), "clEnqueueWriteBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
+
+    // Copy from the temporary buffer to the host buffer.
+    size_t spitch = width[src];
+    size_t sslice = width[src]*height[src];
+    size_t dpitch = width[dst];
+    size_t dslice = width[dst]*height[dst];
+
+    size_t total = sregion[0] * sregion[1] * sregion[2];
+    for (size_t i = 0; i != total; ++i) {
+
+        // Compute the coordinates of the element within the source and destination regions.
+        size_t rslice = sregion[0]*sregion[1];
+        size_t sz = i / rslice;
+        size_t sy = (i % rslice) / sregion[0];
+        size_t sx = (i % rslice) % sregion[0];
+
+        size_t dz = sz;
+        size_t dy = sy;
+        size_t dx = sx;
+
+        // Compute the offset in bytes of the source and destination.
+        size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
+        size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
+
+        verify[dst][d_idx] = tmp_buffer[s_idx];
+    }
+    return 0;
+}
+
+void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
+{
+    free( data );
+}
+
+// This is the main test function for the conformance test.
+int
+test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements)
+{
+    queue = queue_;
+    cl_int err;
+
+    // Initialize the random number generator.
+    mt = init_genrand( gRandomSeed );
+
+    // Compute a maximum buffer size based on the number of test images and the device maximum.
+    cl_ulong max_mem_alloc_size = 0;
+    CL_EXIT_ERROR(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_mem_alloc_size, NULL),"Could not get device info");
+    log_info("CL_DEVICE_MAX_MEM_ALLOC_SIZE = %llu bytes.\n", max_mem_alloc_size);
+
+    // Confirm that the maximum allocation size is not zero.
+    if (max_mem_alloc_size == 0) {
+        log_error("Error: CL_DEVICE_MAX_MEM_ALLOC_SIZE is zero bytes\n");
+        return -1;
+    }
+
+    // Guess at a reasonable maximum dimension.
+    size_t max_mem_alloc_dim = (size_t)cbrt((double)(max_mem_alloc_size/sizeof(BufferType)))/alloc_scale;
+    if (max_mem_alloc_dim == 0) {
+        max_mem_alloc_dim = max_mem_alloc_size;
+    }
+
+    log_info("Using maximum dimension      = %lu.\n", max_mem_alloc_dim);
+
+    // Create pairs of cl buffers and host buffers on which operations will be mirrored.
+    log_info("Creating %u pairs of random sized host and cl buffers.\n", TotalImages);
+
+    size_t max_size = 0;
+    size_t total_bytes = 0;
+
+    for (unsigned i=0; i != TotalImages; ++i) {
+
+        // Determine a width and height for this buffer.
+        size_t size_bytes;
+        size_t tries = 0;
+        size_t max_tries = 1048576;
+        do {
+            width[i]   = get_random_size_t(1, max_mem_alloc_dim, mt);
+            height[i]  = get_random_size_t(1, max_mem_alloc_dim, mt);
+            depth[i]   = get_random_size_t(1, max_mem_alloc_dim, mt);
+            ++tries;
+        } while ((tries < max_tries) && (size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType)) > max_mem_alloc_size);
+
+        // Check to see if adequately sized buffers were found.
+        if (tries >= max_tries) {
+            log_error("Error: Could not find random buffer sized less than %llu bytes in %lu tries.\n",
+                      max_mem_alloc_size, max_tries);
+            return -1;
+        }
+
+        // Keep track of the dimensions of the largest buffer.
+        max_size = (size_bytes > max_size) ? size_bytes : max_size;
+        total_bytes += size_bytes;
+
+        log_info("Buffer[%u] is (%lu,%lu,%lu) = %lu MB (truncated)\n",i,width[i],height[i],depth[i],(size_bytes)/1048576);
+    }
+
+    log_info( "Total size: %lu MB (truncated)\n", total_bytes/1048576 );
+
+    // Allocate a temporary buffer for read and write operations.
+    tmp_buffer_size  = max_size;
+    tmp_buffer = (BufferType*)malloc(tmp_buffer_size);
+
+    // Initialize cl buffers
+    log_info( "Initializing buffers\n" );
+    for (unsigned i=0; i != TotalImages; ++i) {
+
+        size_t size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType);
+
+        // Allocate a host copy of the buffer for verification.
+        verify[i] = (BufferType*)malloc(size_bytes);
+        CL_EXIT_ERROR(verify[i] ? CL_SUCCESS : -1, "malloc of host buffer failed for buffer %u", i);
+
+        // Allocate the buffer in host memory.
+        backing[i] = (BufferType*)malloc(size_bytes);
+        CL_EXIT_ERROR(backing[i] ? CL_SUCCESS : -1, "malloc of backing buffer failed for buffer %u", i);
+
+        // Generate a random buffer.
+        log_info( "Initializing buffer %u\n", i );
+        initialize_image(verify[i], width[i], height[i], depth[i], mt);
+
+        // Copy the image into a buffer which will passed to CL.
+        memcpy(backing[i], verify[i], size_bytes);
+
+        // Create the CL buffer.
+        buffer[i] = clCreateBuffer (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, size_bytes, backing[i], &err);
+        CL_EXIT_ERROR(err,"clCreateBuffer failed for buffer %u", i);
+
+        // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+        err = clSetMemObjectDestructorCallback( buffer[i], mem_obj_destructor_callback, backing[i] );
+        CL_EXIT_ERROR(err, "Unable to set mem object destructor callback" );
+    }
+
+    // Main test loop, run num_tries times.
+    log_info( "Executing %u test operations selected at random.\n", (unsigned)num_tries );
+    for (size_t iter = 0; iter < num_tries; ++iter) {
+
+        // Determine a source and a destination.
+        size_t src = get_random_size_t(0,TotalImages,mt);
+        size_t dst = get_random_size_t(0,TotalImages,mt);
+
+        // Determine the minimum dimensions.
+        size_t min_width = width[src] < width[dst] ? width[src] : width[dst];
+        size_t min_height = height[src] < height[dst] ? height[src] : height[dst];
+        size_t min_depth = depth[src] < depth[dst] ? depth[src] : depth[dst];
+
+        // Generate a random source rectangle within the minimum dimensions.
+        size_t mx = get_random_size_t(0, min_width-1, mt);
+        size_t my = get_random_size_t(0, min_height-1, mt);
+        size_t mz = get_random_size_t(0, min_depth-1, mt);
+
+        size_t sw = get_random_size_t(1, (min_width - mx), mt);
+        size_t sh = get_random_size_t(1, (min_height - my), mt);
+        size_t sd = get_random_size_t(1, (min_depth - mz), mt);
+
+        size_t sx = get_random_size_t(0, width[src]-sw, mt);
+        size_t sy = get_random_size_t(0, height[src]-sh, mt);
+        size_t sz = get_random_size_t(0, depth[src]-sd, mt);
+
+        size_t soffset[] = { sx, sy, sz };
+        size_t sregion[] = { sw, sh, sd };
+
+        // Generate a destination rectangle of the same size.
+        size_t dw = sw;
+        size_t dh = sh;
+        size_t dd = sd;
+
+        // Generate a random destination offset within the buffer.
+        size_t dx = get_random_size_t(0, (width[dst] - dw), mt);
+        size_t dy = get_random_size_t(0, (height[dst] - dh), mt);
+        size_t dz = get_random_size_t(0, (depth[dst] - dd), mt);
+        size_t doffset[] = { dx, dy, dz };
+        size_t dregion[] = { dw, dh, dd };
+
+        // Execute one of three operations:
+        // - Copy: Copies between src and dst within each set of host, buffer, and images.
+        // - Read & verify: Reads src region from buffer and image, and compares to host.
+        // - Write: Generates new buffer with src dimensions, and writes to cl buffer and image.
+
+        enum { TotalOperations = 3 };
+        size_t operation = get_random_size_t(0,TotalOperations,mt);
+
+        switch (operation) {
+            case 0:
+                log_info("%lu Copy %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
+                         iter,
+                         src, soffset[0], soffset[1], soffset[2],
+                         dst, doffset[0], doffset[1], doffset[2],
+                         sregion[0], sregion[1], sregion[2],
+                         sregion[0]*sregion[1]*sregion[2]);
+                if ((err = copy_region(src, soffset, sregion, dst, doffset, dregion)))
+                    return err;
+                break;
+            case 1:
+                log_info("%lu Read %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
+                         iter,
+                         src, soffset[0], soffset[1], soffset[2],
+                         dst, doffset[0], doffset[1], doffset[2],
+                         sregion[0], sregion[1], sregion[2],
+                         sregion[0]*sregion[1]*sregion[2]);
+                if ((err = read_verify_region(src, soffset, sregion, dst, doffset, dregion)))
+                    return err;
+                break;
+            case 2:
+                log_info("%lu Write %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
+                         iter,
+                         src, soffset[0], soffset[1], soffset[2],
+                         dst, doffset[0], doffset[1], doffset[2],
+                         sregion[0], sregion[1], sregion[2],
+                         sregion[0]*sregion[1]*sregion[2]);
+                if ((err = write_region(src, soffset, sregion, dst, doffset, dregion)))
+                    return err;
+                break;
+        }
+
+#if 0
+        // Uncomment this section to verify each operation.
+        // If commented out, verification won't occur until the end of the
+        // test, and it will not be possible to determine which operation failed.
+        log_info("Verify src %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", src, 0, 0, 0, width[src], height[src], depth[src]);
+        if (err = map_verify_region(src))
+            return err;
+
+        log_info("Verify dst %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", dst, 0, 0, 0, width[dst], height[dst], depth[dst]);
+        if (err = map_verify_region(dst))
+            return err;
+
+
+#endif
+
+    } // end main for loop.
+
+    for (unsigned i=0;i<TotalImages;++i) {
+        log_info("Verify %u offset (%u,%u,%u) region (%lux%lux%lu)\n", i, 0, 0, 0, width[i], height[i], depth[i]);
+        if ((err = map_verify_region(i)))
+            return err;
+    }
+
+    // Clean-up.
+    free_mtdata(mt);
+    for (unsigned i=0;i<TotalImages;++i) {
+        free( verify[i] );
+        clReleaseMemObject( buffer[i] );
+    }
+    free( tmp_buffer );
+
+    if (!err) {
+        log_info("RECT read, write test passed\n");
+    }
+
+    return err;
+}
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_constant.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_constant.c
@@ -0,0 +1,275 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *constant_kernel_code =
+"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    float ftmp = tmpF[tid]; \n"
+"    float Itmp = tmpI[tid]; \n"
+"    out[tid] = ftmp * Itmp; \n"
+"}\n";
+
+const char *loop_constant_kernel_code =
+"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
+"{\n"
+"    int tid = get_global_id(0);\n"
+"    float sum = 0;\n"
+"    for (int i = 0; i < num; i++) {\n"
+"        float  pos  = i_pos[i*3];\n"
+"        sum += pos;\n"
+"    }\n"
+"    out[tid] = sum;\n"
+"}\n";
+
+
+static int
+verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
+{
+    int         i;
+
+    for (i=0; i < n; i++)
+    {
+        float f = tmpF[i] * tmpI[i];
+        if( out[i] != f )
+        {
+            log_error("CONSTANT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("CONSTANT test passed\n");
+    return 0;
+}
+
+
+static int
+verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
+{
+    int i;
+    cl_int j;
+    for (i=0; i < n; i++)
+    {
+        float sum = 0;
+        for (j=0; j < l; ++j)
+            sum += tmp[j*3];
+
+        if( out[i] != sum )
+        {
+            log_error("loop CONSTANT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("loop CONSTANT test passed\n");
+    return 0;
+}
+
+int
+test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[3];
+    cl_int            *tmpI;
+    cl_float        *tmpF, *out;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    global_threads[3];
+    int                err;
+    unsigned int                i;
+    cl_ulong maxSize, maxGlobalSize, maxAllocSize;
+    size_t num_floats, num_ints, constant_values;
+    MTdata          d;
+    RoundingMode     oldRoundMode;
+    int isRTZ = 0;
+
+  /* Verify our test buffer won't be bigger than allowed */
+    err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( err, "Unable to get max constant buffer size" );
+
+  log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
+  
+  // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
+  err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
+  test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
+
+  if (maxSize > maxGlobalSize / 4)
+    maxSize = maxGlobalSize / 4;
+
+  err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
+  test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
+
+  if (maxSize > maxAllocSize)
+    maxSize = maxAllocSize;
+  
+  maxSize/=4;
+  num_ints = (size_t)maxSize/sizeof(cl_int);
+  num_floats = (size_t)maxSize/sizeof(cl_float);
+  if (num_ints >= num_floats) {
+    constant_values = num_floats;
+  } else {
+    constant_values = num_ints;
+  }
+
+  log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
+           constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
+
+    tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
+    tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
+    out  = (cl_float*)malloc(sizeof(cl_float) * constant_values);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * constant_values, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<constant_values; i++) {
+        tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
+        tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
+    if (err) {
+    log_error("Failed to create kernel and program: %d\n", err);
+    return -1;
+  }
+
+
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    global_threads[0] = constant_values;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed: %d\n", err);
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
+    {
+        oldRoundMode = set_round(kRoundTowardZero, kfloat);
+        isRTZ = 1;
+    }
+
+    err = verify(tmpF, tmpI, out, (int)constant_values);
+
+    if (isRTZ)
+        (void)set_round(oldRoundMode, kfloat);
+
+    // Loop constant buffer test
+    cl_program loop_program;
+    cl_kernel  loop_kernel;
+    cl_int limit = 2;
+
+    memset(out, 0, sizeof(cl_float) * constant_values);
+    err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
+                                      &loop_constant_kernel_code, "loop_constant_kernel" );
+    if (err) {
+        log_error("Failed to create loop kernel and program: %d\n", err);
+        return -1;
+    }
+
+    err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
+    if (err != CL_SUCCESS) {
+        log_error("clSetKernelArgs for loop kernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS) {
+        log_error("clEnqueueNDRangeKernel failed: %d\n", err);
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
+    if (err != CL_SUCCESS) {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    clReleaseKernel(loop_kernel);
+    clReleaseProgram(loop_program);
+    free(tmpI);
+    free(tmpF);
+    free(out);
+
+    return err;
+}
+
+
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_constant_source.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_constant_source.cpp
@@ -0,0 +1,100 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *constant_source_kernel_code[] = {
+"__constant int outVal = 42;\n"
+"__constant int outIndex = 7;\n"
+"__constant int outValues[ 16 ] = { 17, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };\n"
+"\n"
+"__kernel void constant_kernel( __global int *out )\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    if( tid == 0 )\n"
+"    {\n"
+"        out[ 0 ] = outVal;\n"
+"        out[ 1 ] = outValues[ outIndex ];\n"
+"    }\n"
+"    else\n"
+"    {\n"
+"        out[ tid + 1 ] = outValues[ tid ];\n"
+"    }\n"
+"}\n" };
+
+int test_constant_source(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    clMemWrapper outStream;
+    cl_int         outValues[ 17 ];
+    cl_int         expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };
+
+    cl_int        error;
+
+
+    // Create a kernel to test with
+    error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Create our output buffer
+    outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    // Set the argument
+    error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Run test kernel
+    size_t threads[ 1 ] = { 16 };
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to enqueue kernel" );
+
+    // Read results
+    error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Verify results
+    for( int i = 0; i < 17; i++ )
+    {
+        if( expectedValues[ i ] != outValues[ i ] )
+        {
+            if( i == 0 )
+                log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
+            else if( i == 1 )
+                log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
+            else
+                log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_createkernelsinprogram.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_createkernelsinprogram.c
@@ -0,0 +1,121 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *sample_single_kernel = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n"};
+
+const char *sample_double_kernel = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n"
+"__kernel void sample_test2(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n"};
+
+
+int
+test_createkernelsinprogram(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_program        program;
+    cl_kernel        kernel[2];
+    unsigned int    num_kernels;
+    size_t            lengths[2];
+    int                err;
+
+    lengths[0] = strlen(sample_single_kernel);
+    program = clCreateProgramWithSource(context, 1, &sample_single_kernel, lengths, NULL);
+    if (!program)
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgramExecutable failed\n");
+        return -1;
+    }
+
+    err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels);
+    if ( (err != CL_SUCCESS) || (num_kernels != 1) )
+    {
+        log_error("clCreateKernelsInProgram test failed for a single kernel\n");
+        return -1;
+    }
+
+    clReleaseKernel(kernel[0]);
+    clReleaseProgram(program);
+
+  lengths[0] = strlen(sample_double_kernel);
+  program = clCreateProgramWithSource(context, 1, &sample_double_kernel, lengths, NULL);
+    if (!program)
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgramExecutable failed\n");
+        return -1;
+    }
+
+    err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels);
+    if ( (err != CL_SUCCESS) || (num_kernels != 2) )
+    {
+        log_error("clCreateKernelsInProgram test failed for two kernels\n");
+        return -1;
+    }
+
+  log_info("clCreateKernelsInProgram test passed\n");
+
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseProgram(program);
+
+
+    return err;
+}
+
+
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_enqueue_map.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_enqueue_map.cpp
@@ -0,0 +1,253 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+const cl_mem_flags flag_set[] = {
+  CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_USE_HOST_PTR,
+  CL_MEM_COPY_HOST_PTR,
+  0
+};
+const char* flag_set_names[] = {
+  "CL_MEM_ALLOC_HOST_PTR",
+  "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
+  "CL_MEM_USE_HOST_PTR",
+  "CL_MEM_COPY_HOST_PTR",
+  "0"
+};
+
+int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    const size_t bufferSize = 256*256;
+    int src_flag_id;
+    MTdata d = init_genrand( gRandomSeed );
+    cl_char *initialData = (cl_char*)malloc(bufferSize);
+    cl_char *finalData = (cl_char*)malloc(bufferSize);
+
+    for (src_flag_id=0; src_flag_id < 5; src_flag_id++)
+    {
+        clMemWrapper memObject;
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
+
+        generate_random_data( kChar, (unsigned int)bufferSize, d, initialData );
+
+        if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+            memObject = clCreateBuffer(context, flag_set[src_flag_id],  bufferSize * sizeof( cl_char ), initialData, &error);
+        else
+            memObject = clCreateBuffer(context, flag_set[src_flag_id],  bufferSize * sizeof( cl_char ), NULL, &error);
+        test_error( error, "Unable to create testing buffer" );
+
+        if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+        {
+            error = clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize * sizeof( cl_char ), initialData, 0, NULL, NULL);
+            test_error( error, "clEnqueueWriteBuffer failed");
+        }
+
+        for( int i = 0; i < 128; i++ )
+        {
+
+          size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d );
+          size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d );
+
+          cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
+                                                                offset, length, 0, NULL, NULL, &error );
+          if( error != CL_SUCCESS )
+          {
+            print_error( error, "clEnqueueMapBuffer call failed" );
+            log_error( "\tOffset: %d  Length: %d\n", (int)offset, (int)length );
+            free( initialData );
+            free( finalData );
+            free_mtdata(d);
+            return -1;
+          }
+
+          // Write into the region
+          for( size_t j = 0; j < length; j++ )
+          {
+            cl_char spin = (cl_char)genrand_int32( d );
+
+            // Test read AND write in one swipe
+            cl_char value = mappedRegion[ j ];
+            value = spin - value;
+            mappedRegion[ j ] = value;
+
+            // Also update the initial data array
+            value = initialData[ offset + j ];
+            value = spin - value;
+            initialData[ offset + j ] = value;
+          }
+
+          // Unmap
+          error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
+          test_error( error, "Unable to unmap buffer" );
+        }
+
+        // Final validation: read actual values of buffer and compare against our reference
+        error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, sizeof( cl_char ) * bufferSize, finalData, 0, NULL, NULL );
+        test_error( error, "Unable to read results" );
+
+        for( size_t q = 0; q < bufferSize; q++ )
+        {
+            if( initialData[ q ] != finalData[ q ] )
+            {
+                log_error( "ERROR: Sample %d did not validate! Got %d, expected %d\n", (int)q, (int)finalData[ q ], (int)initialData[ q ] );
+                free( initialData );
+                free( finalData );
+                free_mtdata(d);
+                return -1;
+            }
+        }
+    } // cl_mem flags
+
+    free( initialData );
+    free( finalData );
+    free_mtdata(d);
+
+    return 0;
+}
+
+int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
+    const size_t imageSize = 256;
+    int src_flag_id;
+    cl_uint *initialData;
+    cl_uint *finalData;
+    MTdata  d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    initialData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
+    finalData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
+
+    if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) )
+    {
+        log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" );
+        free(initialData);
+        free(finalData);
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+  for (src_flag_id=0; src_flag_id < 5; src_flag_id++) {
+    clMemWrapper memObject;
+    log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
+
+    generate_random_data( kUInt, (unsigned int)( imageSize * imageSize ), d, initialData );
+
+    if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+      memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
+                                  imageSize, imageSize, 0, initialData, &error );
+    else
+      memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
+                                  imageSize, imageSize, 0, NULL, &error );
+    test_error( error, "Unable to create testing buffer" );
+
+    if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
+      size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1};
+      error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, NULL, NULL, initialData, 0, NULL, NULL);
+      test_error( error, "Unable to write to testing buffer" );
+    }
+
+    for( int i = 0; i < 128; i++ )
+    {
+
+      size_t offset[3], region[3];
+      size_t rowPitch;
+
+      offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
+      region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d );
+      offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
+      region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d );
+      offset[ 2 ] = 0;
+      region[ 2 ] = 1;
+      cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
+                                                           offset, region, &rowPitch, NULL, 0, NULL, NULL, &error );
+      if( error != CL_SUCCESS )
+      {
+        print_error( error, "clEnqueueMapImage call failed" );
+        log_error( "\tOffset: %d,%d  Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] );
+        free(initialData);
+        free(finalData);
+        free_mtdata(d);
+        return -1;
+      }
+
+      // Write into the region
+      cl_uint *mappedPtr = mappedRegion;
+      for( size_t y = 0; y < region[ 1 ]; y++ )
+      {
+        for( size_t x = 0; x < region[ 0 ] * 4; x++ )
+        {
+          cl_int spin = (cl_int)random_in_range( 16, 1024, d );
+
+          cl_int value;
+          // Test read AND write in one swipe
+          value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ];
+          value = spin - value;
+          mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value;
+
+          // Also update the initial data array
+          value = initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ];
+          value = spin - value;
+          initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ] = value;
+        }
+      }
+
+      // Unmap
+      error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
+      test_error( error, "Unable to unmap buffer" );
+    }
+
+    // Final validation: read actual values of buffer and compare against our reference
+    size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 };
+    error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    for( size_t q = 0; q < imageSize * imageSize * 4; q++ )
+    {
+      if( initialData[ q ] != finalData[ q ] )
+      {
+        log_error( "ERROR: Sample %d (coord %d,%d) did not validate! Got %d, expected %d\n", (int)q, (int)( ( q / 4 ) % imageSize ), (int)( ( q / 4 ) / imageSize ),
+                                    (int)finalData[ q ], (int)initialData[ q ] );
+        free(initialData);
+        free(finalData);
+        free_mtdata(d);
+        return -1;
+      }
+    }
+  } // cl_mem_flags
+
+    free(initialData);
+    free(finalData);
+    free_mtdata(d);
+    return 0;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_explicit_s2v.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_explicit_s2v.cpp
@@ -0,0 +1,384 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#define DECLARE_S2V_IDENT_KERNEL(srctype,dsttype,size) \
+"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n"        \
+"{\n"                                                                            \
+"    int  tid = get_global_id(0);\n"                                        \
+"    " srctype "  src = sourceValues[tid];\n"                                        \
+"\n"                                                                            \
+"    destValues[tid] = (" dsttype #size ")src;\n"                        \
+"\n"                                                                            \
+"}\n"
+
+#define DECLARE_S2V_IDENT_KERNELS(srctype,dsttype) \
+{        \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,2), \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,4), \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,8), \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,16) \
+}
+
+#define DECLARE_EMPTY { NULL, NULL, NULL, NULL, NULL }
+
+/* Note: the next four arrays all must match in order and size to the ExplicitTypes enum in conversions.h!!! */
+
+#define DECLARE_S2V_IDENT_KERNELS_SET(srctype)    \
+{                                                    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,bool),            \
+            DECLARE_S2V_IDENT_KERNELS(#srctype,char),            \
+            DECLARE_S2V_IDENT_KERNELS(#srctype,uchar),            \
+            DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,short),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,ushort),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,int),                \
+DECLARE_S2V_IDENT_KERNELS(#srctype,uint),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,long),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,ulong),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,float),            \
+DECLARE_EMPTY                                        \
+}
+
+#define DECLARE_EMPTY_SET                \
+{                                                    \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY    \
+}
+
+
+/* The overall array */
+const char * kernel_explicit_s2v_set[kNumExplicitTypes][kNumExplicitTypes][5] = {
+    DECLARE_S2V_IDENT_KERNELS_SET(bool),
+    DECLARE_S2V_IDENT_KERNELS_SET(char),
+    DECLARE_S2V_IDENT_KERNELS_SET(uchar),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned char),
+    DECLARE_S2V_IDENT_KERNELS_SET(short),
+    DECLARE_S2V_IDENT_KERNELS_SET(ushort),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned short),
+    DECLARE_S2V_IDENT_KERNELS_SET(int),
+    DECLARE_S2V_IDENT_KERNELS_SET(uint),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned int),
+    DECLARE_S2V_IDENT_KERNELS_SET(long),
+    DECLARE_S2V_IDENT_KERNELS_SET(ulong),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned long),
+    DECLARE_S2V_IDENT_KERNELS_SET(float),
+    DECLARE_EMPTY_SET
+};
+
+int test_explicit_s2v_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programSrc,
+                               ExplicitType srcType, unsigned int count, ExplicitType destType, unsigned int vecSize, void *inputData )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error;
+    clMemWrapper streams[2];
+    void *outData;
+    unsigned char convertedData[ 8 ];    /* Max type size is 8 bytes */
+    size_t threadSize[3], groupSize[3];
+    unsigned int i, s;
+    unsigned char *inPtr, *outPtr;
+    size_t paramSize, destTypeSize;
+
+    const char* finalProgramSrc[2] = {
+        "", // optional pragma
+        programSrc
+    };
+
+    if (srcType == kDouble || destType == kDouble) {
+        finalProgramSrc[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+    }
+
+
+    if( programSrc == NULL )
+        return 0;
+
+    paramSize = get_explicit_type_size( srcType );
+    destTypeSize = get_explicit_type_size( destType );
+
+    size_t destStride = destTypeSize * vecSize;
+
+    outData = malloc( destStride * count );
+
+    if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) )
+    {
+        log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] );
+        return -1;
+    }
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
+    test_error( error, "clCreateBuffer failed");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  destStride * count, NULL, &error);
+    test_error( error, "clCreateBuffer failed");
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threadSize[0] = count;
+
+    error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now verify the results. Each value should have been duplicated four times, and we should be able to just
+     do a memcpy instead of relying on the actual type of data */
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output values!" );
+
+    inPtr = (unsigned char *)inputData;
+    outPtr = (unsigned char *)outData;
+
+    for( i = 0; i < count; i++ )
+    {
+        /* Convert the input data element to our output data type to compare against */
+        convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
+
+        /* Now compare every element of the vector */
+        for( s = 0; s < vecSize; s++ )
+        {
+            if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
+            {
+                unsigned int *p = (unsigned int *)outPtr;
+                log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
+                log_error( "       Input:   0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
+                log_error( "       Actual:  0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                return -1;
+            }
+        }
+        inPtr += paramSize;
+        outPtr += destStride;
+    }
+
+    free( outData );
+
+    return 0;
+}
+
+int test_explicit_s2v_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, ExplicitType srcType,
+                                   unsigned int count, void *inputData )
+{
+    unsigned int sizes[] = { 2, 4, 8, 16, 0 };
+    int i, dstType, failed = 0;
+
+
+    for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ )
+    {
+        if( dstType == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            continue;
+
+        if (( dstType == kLong || dstType == kULong ) && !gHasLong )
+            continue;
+
+        for( i = 0; sizes[i] != 0; i++ )
+        {
+            if( dstType != srcType )
+                continue;
+            if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL ||
+               strchr( get_explicit_type_name( (ExplicitType)dstType ), ' ' ) != NULL )
+                continue;
+
+            if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ],
+                                           srcType, count, (ExplicitType)dstType, sizes[ i ], inputData ) != 0 )
+            {
+                log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n",
+                          get_explicit_type_name(srcType), get_explicit_type_name((ExplicitType)dstType), sizes[i], get_explicit_type_name((ExplicitType)dstType) );
+                failed = -1;
+                break;
+            }
+        }
+    }
+
+    return failed;
+}
+
+int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" );
+    return 0;
+#if 0
+    bool    data[128];
+
+    generate_random_data( kBool, 128, data );
+
+    return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data );
+#endif
+}
+
+int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kChar, 128, seed, data );
+
+    return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data );
+}
+
+int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned char    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kUChar, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    short            data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kShort, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned short    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kUShort, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int                data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kInt, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned int    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kUInt, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_long    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kLong, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kLong,  128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_ulong    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kULong, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kULong,  128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    float            data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kFloat, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+
+int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    double            data[128];
+    RandomSeed seed(gRandomSeed);
+
+    if( !is_extension_available( deviceID, "cl_khr_fp64" ) ) {
+        log_info("Extension cl_khr_fp64 not supported. Skipping test.\n");
+        return 0;
+    }
+
+    generate_random_data( kDouble, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_float2int.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_float2int.c
@@ -0,0 +1,160 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *float2int_kernel_code =
+"__kernel void test_float2int(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n";
+
+
+int
+verify_float2int(cl_float *inptr, cl_int *outptr, int n)
+{
+  int     i;
+
+  for (i=0; i<n; i++)
+  {
+    if (outptr[i] != (int)inptr[i])
+    {
+      log_error("FLOAT2INT test failed\n");
+      return -1;
+    }
+  }
+
+  log_info("FLOAT2INT test passed\n");
+  return 0;
+}
+
+
+int
+test_float2int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[2];
+    cl_float        *input_ptr;
+    cl_int          *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    void            *values[2];
+    size_t            lengths[1];
+    size_t    threads[1];
+    int                err;
+    int                i;
+    MTdata          d;
+
+    input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*num_elements, (void *)input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    lengths[0] = strlen(float2int_kernel_code);
+    program = clCreateProgramWithSource(context, 1, &float2int_kernel_code, lengths, NULL);
+    if (!program)
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgramExecutable failed\n");
+        return -1;
+    }
+
+    kernel = clCreateKernel(program, "test_float2int", NULL);
+    if (!kernel)
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+  err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err = clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_float2int(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_fpmath_float.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float.c
@@ -0,0 +1,270 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "../../test_common/harness/rounding_mode.h"
+
+#include "procs.h"
+
+const char *fpadd_kernel_code =
+"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *fpsub_kernel_code =
+"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *fpmul_kernel_code =
+"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+
+static const float    MAX_ERR = 1e-5f;
+
+int
+verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_ADD float test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_ADD float test passed\n");
+    return 0;
+}
+
+int
+verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_SUB float test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_SUB float test passed\n");
+    return 0;
+}
+
+int
+verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_MUL float test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_MUL float test passed\n");
+    return 0;
+}
+
+
+int
+test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[3];
+    cl_kernel kernel[3];
+
+    float *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+    size_t length = sizeof(cl_float) * num_elements;
+    int isRTZ = 0;
+    RoundingMode oldMode = kDefaultRoundingMode;
+
+    // check for floating point capabilities
+    cl_device_fp_config single_config = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
+    if (err) {
+      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
+      test_finish();
+      return -1;
+    }
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
+    {
+        //Check to make sure we are an embedded device
+        char profile[32];
+        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+        if( err )
+        {
+            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
+              test_finish();
+              return -1;
+        }
+        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
+        {
+            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
+            test_finish();
+            return -1;
+        }
+
+        isRTZ = 1;
+        oldMode = get_round();
+    }
+
+
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    test_error( err, "clEnqueueWriteBuffer failed.");
+
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    test_error( err, "clEnqueueWriteBuffer failed.");
+
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    test_error( err, "clEnqueueWriteBuffer failed.");
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
+    test_error( err, "create_single_kernel_helper failed");
+
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    threads[0] = (unsigned int)num_elements;
+    for (i=0; i<3; i++)
+    {
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+        test_error( err, "clEnqueueNDRangeKernel failed.");
+
+        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+        test_error( err, "clEnqueueReadBuffer failed.");
+
+        if( isRTZ )
+            set_round( kRoundTowardZero, kfloat );
+
+        switch (i)
+        {
+            case 0:
+                err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+                break;
+            case 1:
+                err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+                break;
+            case 2:
+                err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+                break;
+        }
+
+        if( isRTZ )
+            set_round( oldMode, kfloat );
+
+        if (err)
+            break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<3; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    free_mtdata( d );
+
+    return err;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_fpmath_float2.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float2.c
@@ -0,0 +1,268 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "../../test_common/harness/rounding_mode.h"
+
+
+#include "procs.h"
+
+const char *fpadd2_kernel_code =
+"__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *fpsub2_kernel_code =
+"__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *fpmul2_kernel_code =
+"__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+
+int
+verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_ADD float2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_ADD float2 test passed\n");
+    return 0;
+}
+
+int
+verify_fpsub2(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_SUB float2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_SUB float2 test passed\n");
+    return 0;
+}
+
+int
+verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_MUL float2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_MUL float2 test passed\n");
+    return 0;
+}
+
+
+int
+test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[3];
+    cl_kernel kernel[3];
+
+    cl_float *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_float) * 2 * num_elements;
+    int isRTZ = 0;
+    RoundingMode oldMode = kDefaultRoundingMode;
+
+    // check for floating point capabilities
+    cl_device_fp_config single_config = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
+    if (err) {
+      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
+      test_finish();
+      return -1;
+    }
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
+    {
+        //Check to make sure we are an embedded device
+        char profile[32];
+        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+        if( err )
+        {
+            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
+              test_finish();
+              return -1;
+        }
+        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
+        {
+            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
+            test_finish();
+            return -1;
+        }
+
+        isRTZ = 1;
+        oldMode = get_round();
+    }
+
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub2_kernel_code, "test_fpsub2");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul2_kernel_code, "test_fpmul2");
+    test_error( err, "create_single_kernel_helper failed");
+
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+    free_mtdata(d);
+    d = NULL;
+
+    threads[0] = (unsigned int)num_elements;
+    for (i=0; i<3; i++)
+    {
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+      test_error( err, "clEnqueueNDRangeKernel failed.");
+
+        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+      test_error( err, "clEnqueueReadBuffer failed.");
+
+        if( isRTZ )
+            set_round( kRoundTowardZero, kfloat );
+
+        switch (i)
+        {
+            case 0:
+                err = verify_fpadd2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
+                break;
+            case 1:
+                err = verify_fpsub2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
+                break;
+            case 2:
+                err = verify_fpmul2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
+                break;
+        }
+
+        if( isRTZ )
+            set_round( oldMode, kfloat );
+
+        if (err)
+            break;
+    }
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<3; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_fpmath_float4.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float4.c
@@ -0,0 +1,269 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/rounding_mode.h"
+
+const char *fpadd4_kernel_code =
+"__kernel void test_fpadd4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *fpsub4_kernel_code =
+"__kernel void test_fpsub4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *fpmul4_kernel_code =
+"__kernel void test_fpmul4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+
+int
+verify_fpadd4(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_ADD float4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_ADD float4 test passed\n");
+    return 0;
+}
+
+int
+verify_fpsub4(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_SUB float4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_SUB float4 test passed\n");
+    return 0;
+}
+
+int
+verify_fpmul4(float *inptrA, float *inptrB, float *outptr, int n)
+{
+  float       r;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+    r = inptrA[i] * inptrB[i];
+    if (r != outptr[i])
+    {
+      log_error("FP_MUL float4 test failed\n");
+      return -1;
+    }
+  }
+
+  log_info("FP_MUL float4 test passed\n");
+  return 0;
+}
+
+
+int
+test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[3];
+    cl_kernel kernel[3];
+
+    cl_float *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_float) * 4 * num_elements;
+    int isRTZ = 0;
+    RoundingMode oldMode = kDefaultRoundingMode;
+
+    // check for floating point capabilities
+    cl_device_fp_config single_config = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
+    if (err) {
+      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
+      test_finish();
+      return -1;
+    }
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
+    {
+        //Check to make sure we are an embedded device
+        char profile[32];
+        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+        if( err )
+        {
+            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
+              test_finish();
+              return -1;
+        }
+        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
+        {
+            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
+            test_finish();
+            return -1;
+        }
+
+        isRTZ = 1;
+        oldMode = get_round();
+    }
+
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+
+    free_mtdata(d);
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd4_kernel_code, "test_fpadd4");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub4_kernel_code, "test_fpsub4");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul4_kernel_code, "test_fpmul4");
+    test_error( err, "create_single_kernel_helper failed");
+
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<3; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    test_error( err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    test_error( err, "clEnqueueReadBuffer failed.");
+
+    if( isRTZ )
+        set_round( kRoundTowardZero, kfloat );
+
+    switch (i)
+    {
+      case 0:
+        err = verify_fpadd4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
+        break;
+      case 1:
+        err = verify_fpsub4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
+        break;
+      case 2:
+        err = verify_fpmul4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
+        break;
+    }
+
+    if( isRTZ )
+        set_round( oldMode, kfloat );
+
+    if (err)
+      break;
+    }
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<3; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_global_work_offsets.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_global_work_offsets.cpp
@@ -0,0 +1,284 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include <ctype.h>
+
+
+const char *work_offset_test[] = {
+    "__kernel void test( __global int * outputID_A, \n"
+    "                        __global int * outputID_B, __global int * outputID_C )\n"
+    "{\n"
+    "    size_t id0 = get_local_id( 0 ) + get_group_id( 0 ) * get_local_size( 0 );\n"
+    "    size_t id1 = get_local_id( 1 ) + get_group_id( 1 ) * get_local_size( 1 );\n"
+    "    size_t id2 = get_local_id( 2 ) + get_group_id( 2 ) * get_local_size( 2 );\n"
+    "    size_t id = ( id2 * get_global_size( 0 ) * get_global_size( 1 ) ) + ( id1 * get_global_size( 0 ) ) + id0;\n"
+    "\n"
+    "    outputID_A[ id ] = get_global_id( 0 );\n"
+    "    outputID_B[ id ] = get_global_id( 1 );\n"
+    "    outputID_C[ id ] = get_global_id( 2 );\n"
+    "}\n"
+    };
+
+#define MAX_TEST_ITEMS 16 * 16 * 16
+#define NUM_TESTS 16
+#define MAX_OFFSET 256
+
+#define CHECK_RANGE( v, m, c ) \
+    if( ( v >= (cl_int)m ) || ( v < 0 ) ) \
+    {    \
+        log_error( "ERROR: ouputID_%c[%lu]: %d is < 0 or >= %lu\n", c, i, v, m ); \
+        return -1;    \
+    }
+
+int check_results( size_t threads[], size_t offsets[], cl_int outputA[], cl_int outputB[], cl_int outputC[] )
+{
+    size_t offsettedSizes[ 3 ] = { threads[ 0 ] + offsets[ 0 ], threads[ 1 ] + offsets[ 1 ], threads[ 2 ] + offsets[ 2 ] };
+    size_t limit = threads[ 0 ] * threads[ 1 ] * threads[ 2 ];
+
+    static char counts[ MAX_OFFSET + 32 ][ MAX_OFFSET + 16 ][ MAX_OFFSET + 16 ];
+    memset( counts, 0, sizeof( counts ) );
+
+    for( size_t i = 0; i < limit; i++ )
+    {
+        // Check ranges first
+        CHECK_RANGE( outputA[ i ], offsettedSizes[ 0 ], 'A' )
+        CHECK_RANGE( outputB[ i ], offsettedSizes[ 1 ], 'B' )
+        CHECK_RANGE( outputC[ i ], offsettedSizes[ 2 ], 'C' )
+
+        // Now set the value in the map
+        counts[ outputA[ i ] ][ outputB[ i ] ][ outputC[ i ] ]++;
+    }
+
+    // Now check the map
+    int missed = 0, multiple = 0, errored = 0, corrected = 0;
+    for( size_t x = 0; x < offsettedSizes[ 0 ]; x++ )
+    {
+        for( size_t y = 0; y < offsettedSizes[ 1 ]; y++ )
+        {
+            for( size_t z = 0; z < offsettedSizes[ 2 ]; z++ )
+            {
+                const char * limitMsg = " (further errors of this type suppressed)";
+                if( ( x >= offsets[ 0 ] ) && ( y >= offsets[ 1 ] ) && ( z >= offsets[ 2 ] ) )
+                {
+                    if( counts[ x ][ y ][ z ] < 1 )
+                    {
+                        if( missed < 3 )
+                            log_error( "ERROR: Map value (%ld,%ld,%ld) was missed%s\n", x, y, z, ( missed == 2 ) ? limitMsg : "" );
+                        missed++;
+                    }
+                    else if( counts[ x ][ y ][ z ] > 1 )
+                    {
+                        if( multiple < 3 )
+                            log_error( "ERROR: Map value (%ld,%ld,%ld) was returned multiple times%s\n", x, y, z, ( multiple == 2 ) ? limitMsg : "" );
+                        multiple++;
+                    }
+                }
+                else
+                {
+                    if( counts[ x ][ y ][ z ] > 0 )
+                    {
+                        if( errored < 3 )
+                            log_error( "ERROR: Map value (%ld,%ld,%ld) was erroneously returned%s\n", x, y, z, ( errored == 2 ) ? limitMsg : "" );
+                        errored++;
+                    }
+                }
+                    }
+                }
+                    }
+
+    if( missed || multiple || errored )
+    {
+        size_t diffs[3] = { ( offsets[ 0 ] > threads[ 0 ] ? 0 : threads[ 0 ] - offsets[ 0 ] ),
+                        ( offsets[ 1 ] > threads[ 1 ] ? 0 : threads[ 1 ] - offsets[ 1 ] ),
+                        ( offsets[ 2 ] > threads[ 2 ] ? 0 : threads[ 2 ] - offsets[ 2 ] ) };
+            int diff = (int)( ( threads[ 0 ] - diffs[ 0 ] ) * ( threads[ 1 ] - diffs[ 1 ] ) * ( threads[ 2 ] - diffs[ 2 ] ) );
+
+        if( ( multiple == 0 ) && ( missed == diff ) && ( errored == diff ) )
+            log_error( "ERROR: Global work offset values are not being respected by get_global_id()\n" );
+        else
+            log_error( "ERROR: Global work offset values did not function as expected (%d missed, %d reported multiple times, %d erroneously hit)\n",
+                            missed, multiple, errored );
+    }
+    return ( missed | multiple | errored | corrected );
+}
+
+int test_global_work_offsets(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 7 ];
+
+    int error;
+    size_t    threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
+    cl_int outputA[ MAX_TEST_ITEMS ], outputB[ MAX_TEST_ITEMS ], outputC[ MAX_TEST_ITEMS ];
+
+
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel, 1, work_offset_test, "test" ) != 0 )
+    {
+        return -1;
+    }
+
+    //// Create some output streams
+
+    // Use just one output array to init them all (no need to init every single stack storage here)
+    memset( outputA, 0xff, sizeof( outputA ) );
+    for( int i = 0; i < 3; i++ )
+    {
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(outputA), outputA, &error );
+        test_error( error, "Unable to create output array" );
+    }
+
+    // Run a few different times
+    MTdata seed = init_genrand( gRandomSeed );
+    for( int test = 0; test < NUM_TESTS; test++ )
+    {
+        // Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
+        threads[ 0 ] = random_in_range( 1, 32, seed );
+        threads[ 1 ] = random_in_range( 1, 16, seed );
+        threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
+
+        // Make sure we get the local thread count right
+        error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
+        test_error( error, "Unable to determine local work group sizes" );
+
+        // Randomize some offsets
+        for( int j = 0; j < 3; j++ )
+            offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
+
+        log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
+                 threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
+                 offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
+
+        // Now set up and run
+        for( int i = 0; i < 3; i++ )
+        {
+            error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
+            test_error( error, "Unable to set indexed kernel arguments" );
+        }
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        // Read our results back now
+        cl_int * resultBuffers[] = { outputA, outputB, outputC };
+        for( int i = 0; i < 3; i++ )
+        {
+            error = clEnqueueReadBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( outputA ), resultBuffers[ i ], 0, NULL, NULL );
+            test_error( error, "Unable to get result data" );
+        }
+
+        // Now we need to check the results. The outputs should have one entry for each possible ID,
+        // but they won't be in order, so we need to construct a count map to determine what we got
+        if( check_results( threads, offsets, outputA, outputB, outputC ) )
+        {
+            log_error( "\t(Test failed for global dim %ld,%ld,%ld, local dim %ld,%ld,%ld, offsets %ld,%ld,%ld)\n",
+                      threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
+                      offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
+            return -1;
+        }
+    }
+
+    free_mtdata(seed);
+
+    // All done!
+    return 0;
+}
+
+const char *get_offset_test[] = {
+    "__kernel void test( __global int * outOffsets )\n"
+    "{\n"
+    "    // We use local ID here so we don't have to worry about offsets\n"
+    "   // Also note that these should be the same for ALL threads, so we won't worry about contention\n"
+    "    outOffsets[ 0 ] = (int)get_global_offset( 0 );\n"
+    "    outOffsets[ 1 ] = (int)get_global_offset( 1 );\n"
+    "    outOffsets[ 2 ] = (int)get_global_offset( 2 );\n"
+    "}\n"
+};
+
+int test_get_global_offset(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 1 ];
+
+    int error;
+    size_t    threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
+    cl_int outOffsets[ 3 ];
+
+
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel, 1, get_offset_test, "test" ) != 0 )
+    {
+        return -1;
+    }
+
+    // Create some output streams, and storage for a single control ID
+    memset( outOffsets, 0xff, sizeof( outOffsets ) );
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof( outOffsets ), outOffsets, &error );
+    test_error( error, "Unable to create control ID buffer" );
+
+    // Run a few different times
+    MTdata seed = init_genrand( gRandomSeed );
+    for( int test = 0; test < NUM_TESTS; test++ )
+    {
+        // Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
+        threads[ 0 ] = random_in_range( 1, 32, seed );
+        threads[ 1 ] = random_in_range( 1, 16, seed );
+        threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
+
+        // Make sure we get the local thread count right
+        error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
+        test_error( error, "Unable to determine local work group sizes" );
+
+        // Randomize some offsets
+        for( int j = 0; j < 3; j++ )
+            offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
+
+        log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
+                 threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
+                 offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
+
+        // Now set up and run
+        error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        // Read our results back now
+        error = clEnqueueReadBuffer( queue, streams[ 0 ], CL_TRUE, 0, sizeof( outOffsets ), outOffsets, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+
+        // And check!
+        int errors = 0;
+        for( int j = 0; j < 3; j++ )
+        {
+            if( outOffsets[ j ] != (cl_int)offsets[ j ] )
+            {
+                log_error( "ERROR: get_global_offset( %d ) did not return expected value (expected %ld, got %d)\n", j, offsets[ j ], outOffsets[ j ] );
+                errors++;
+            }
+        }
+        if( errors > 0 )
+            return errors;
+    }
+    free_mtdata(seed);
+
+    // All done!
+    return 0;
+}
+
--- a/test_conformance/compatibility/test_conformance/basic/test_hiloeo.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_hiloeo.c
@@ -0,0 +1,421 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+int hi_offset( int index, int vectorSize) { return index + vectorSize / 2; }
+int lo_offset( int index, int vectorSize) { return index; }
+int even_offset( int index, int vectorSize ) { return index * 2; }
+int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
+
+typedef int (*OffsetFunc)( int index, int vectorSize );
+static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
+typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
+static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
+static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
+
+static const unsigned int vector_sizes[] =     { 1, 2, 3, 4, 8, 16};
+static const unsigned int vector_aligns[] =    { 1, 2, 4, 4, 8, 16};
+static const unsigned int out_vector_idx[] =   { 0, 0, 1, 1, 3, 4};
+// if input is size vector_sizes[i], output is size
+// vector_sizes[out_vector_idx[i]]
+// input type name is strcat(gentype, vector_size_names[i]);
+// and output type name is
+// strcat(gentype, vector_size_names[out_vector_idx[i]]);
+static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
+    -1,-1,-1,-1,-1,-1,-1,5};
+static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
+
+static const size_t  kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
+static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
+
+int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_int *input_ptr, *output_ptr, *p;
+    int err;
+    cl_uint i;
+    int hasDouble = is_extension_available( device, "cl_khr_fp64" );
+    cl_uint vectorSize, operatorToUse;
+    cl_uint type;
+    MTdata d;
+
+    int expressionMode;
+    int numExpressionModes = 2;
+
+    size_t length = sizeof(cl_int) * 4 * n_elems;
+
+    input_ptr   = (cl_int*)malloc(length);
+    output_ptr  = (cl_int*)malloc(length);
+
+    p = input_ptr;
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<4 * (cl_uint) n_elems; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
+    {
+        // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
+        size_t elementCount = length / kSizes[type];
+        cl_mem streams[2];
+
+        // skip double if unavailable
+        if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
+            continue;
+
+        if( !gHasLong &&
+            ( 0 == strcmp( test_str_names[type], "long" )) &&
+            ( 0 == strcmp( test_str_names[type], "ulong" )))
+            continue;
+
+        log_info( "%s", test_str_names[type] );
+        fflush( stdout );
+
+        // Set up data streams for the type
+        streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+        streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+        if (!streams[1])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+
+        for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
+        {
+            log_info( " %s", operatorToUse_names[ operatorToUse ] );
+            fflush( stdout );
+            for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
+                for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
+
+                    cl_program program = NULL;
+                    cl_kernel kernel = NULL;
+                    cl_uint outVectorSize = out_vector_idx[vectorSize];
+                    char expression[1024];
+
+                    const char *source[] = {
+                        "", // optional pragma string
+                        "__kernel void test_", operatorToUse_names[ operatorToUse ], "_", test_str_names[type], vector_size_names[vectorSize],
+                        "(__global ", test_str_names[type], vector_size_names[vectorSize],
+                        " *srcA, __global ", test_str_names[type], vector_size_names[outVectorSize],
+                        " *dst)\n"
+                        "{\n"
+                        "    int  tid = get_global_id(0);\n"
+                        "\n"
+                        "    ", test_str_names[type],
+                        vector_size_names[out_vector_idx[vectorSize]],
+                        " tmp = ", expression, ".", operatorToUse_names[ operatorToUse ], ";\n"
+                        "    dst[tid] = tmp;\n"
+                        "}\n"
+                    };
+
+                    if(expressionMode == 0) {
+                        sprintf(expression, "srcA[tid]");
+                    } else if(expressionMode == 1) {
+                        switch(vector_sizes[vectorSize]) {
+                            case 16:
+                                sprintf(expression,
+                                        "((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 8:
+                                sprintf(expression,
+                                        "((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 4:
+                                sprintf(expression,
+                                        "((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 3:
+                                sprintf(expression,
+                                        "((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 2:
+                                sprintf(expression,
+                                        "((%s2)(srcA[tid].s0, srcA[tid].s1))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            default :
+                                sprintf(expression, "srcA[tid]");
+                                log_info("Default\n");
+                        }
+                    } else {
+                        sprintf(expression, "srcA[tid]");
+                    }
+
+                    if (0 == strcmp( test_str_names[type], "double" ))
+                        source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+
+                    char kernelName[128];
+                    snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
+                    err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
+                    if (err)
+                        return -1;
+
+                    err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+                    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clSetKernelArgs failed\n");
+                        return -1;
+                    }
+
+                    //Wipe the output buffer clean
+                    uint32_t pattern = 0xdeadbeef;
+                    memset_pattern4( output_ptr, &pattern, length );
+                    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clEnqueueWriteBuffer failed\n");
+                        return -1;
+                    }
+
+                    size_t size = elementCount / (vector_aligns[vectorSize]);
+                    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clEnqueueNDRangeKernel failed\n");
+                        return -1;
+                    }
+
+                    err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clEnqueueReadBuffer failed\n");
+                        return -1;
+                    }
+
+                    char *inP = (char *)input_ptr;
+                    char *outP = (char *)output_ptr;
+                    outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
+                                            ( vector_sizes[ out_vector_idx[vectorSize] ] ) );
+                    // was                outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
+                    for( size_t e = 0; e < size; e++ )
+                    {
+                        if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) {
+
+                            log_info("e is %d\n", (int)e);
+                            fflush(stdout);
+                            // break;
+                            return -1;
+                        }
+                        inP += kSizes[type] * ( vector_aligns[vectorSize] );
+                        outP += kSizes[type] * ( vector_aligns[outVectorSize] );
+                    }
+
+                    clReleaseKernel( kernel );
+                    clReleaseProgram( program );
+                    log_info( "." );
+                    fflush( stdout );
+                }
+            }
+        }
+
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        log_info( "done\n" );
+    }
+
+    log_info("HiLoEO test passed\n");
+
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
+{
+    cl_ulong  array[8];
+    void *p = array;
+    size_t halfVectorSize  = vector_sizes[out_vector_idx[vectorSize]];
+    size_t cmpVectorSize =  vector_sizes[out_vector_idx[vectorSize]];
+    // was 1 << (vectorSize-1);
+    OffsetFunc f = offsetFuncs[ operatorToUse ];
+    size_t elementSize =  kSizes[type];
+
+    if(vector_size_names[vectorSize][0] == '3') {
+        if(operatorToUse_names[operatorToUse][0] == 'h' ||
+           operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
+        {
+            cmpVectorSize = 1; // special case for vec3 ignored values
+        }
+    }
+
+    switch( elementSize )
+    {
+        case 1:
+        {
+            char *i = (char*)in;
+            char *o = (char*)out;
+            size_t j;
+            cl_uint k;
+            OffsetFunc f = offsetFuncs[ operatorToUse ];
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                char *o2 = (char*)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
+                {
+                    log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+                    for( j = 1; j < halfVectorSize * 2; j++ )
+                        log_info( ", %d", i[j] );
+                    log_info( " } --> { %d", o[0] );
+                    for( j = 1; j < halfVectorSize; j++ )
+                        log_info( ", %d", o[j] );
+                    log_info( " }\n" );
+                    return -1;
+                }
+                i += 2 * halfVectorSize;
+                o += halfVectorSize;
+            }
+        }
+            break;
+
+        case 2:
+        {
+            short *i = (short*)in;
+            short *o = (short*)out;
+            size_t j;
+            cl_uint k;
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                short *o2 = (short*)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
+                {
+                    log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+                    for( j = 1; j < halfVectorSize * 2; j++ )
+                        log_info( ", %d", i[j] );
+                    log_info( " } --> { %d", o[0] );
+                    for( j = 1; j < halfVectorSize; j++ )
+                        log_info( ", %d", o[j] );
+                    log_info( " }\n" );
+                    return -1;
+                }
+                i += 2 * halfVectorSize;
+                o += halfVectorSize;
+            }
+        }
+            break;
+
+        case 4:
+        {
+            int *i = (int*)in;
+            int *o = (int*)out;
+            size_t j;
+            cl_uint k;
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                int *o2 = (int *)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                for( j = 0; j < cmpVectorSize; j++ )
+        {
+            /* Allow float nans to be binary different */
+            if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
+            {
+                log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+            for( j = 1; j < halfVectorSize * 2; j++ )
+                log_info( ", 0x%8.8x", i[j] );
+            log_info( " } --> { 0x%8.8x", o[0] );
+            for( j = 1; j < halfVectorSize; j++ )
+                log_info( ", 0x%8.8x", o[j] );
+            log_info( " }\n" );
+            return -1;
+            }
+        }
+        i += 2 * halfVectorSize;
+        o += halfVectorSize;
+            }
+        }
+            break;
+
+        case 8:
+        {
+            cl_ulong *i = (cl_ulong*)in;
+            cl_ulong *o = (cl_ulong*)out;
+            size_t j;
+            cl_uint k;
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                cl_ulong *o2 = (cl_ulong*)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
+                {
+                    log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+                    for( j = 1; j < halfVectorSize * 2; j++ )
+                        log_info( ", 0x%16.16llx", i[j] );
+                    log_info( " } --> { 0x%16.16llx", o[0] );
+                    for( j = 1; j < halfVectorSize; j++ )
+                        log_info( ", 0x%16.16llx", o[j] );
+                    log_info( " }\n" );
+                    return -1;
+                }
+                i += 2 * halfVectorSize;
+                o += halfVectorSize;
+            }
+        }
+            break;
+
+        default:
+            log_info( "Internal error. Unknown data type\n" );
+            return -2;
+    }
+
+    return 0;
+}
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_hostptr.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_hostptr.c
@@ -0,0 +1,276 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *hostptr_kernel_code =
+"__kernel void test_hostptr(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+static const float    MAX_ERR = 1e-5f;
+
+static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
+{
+    cl_float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static void make_random_data(unsigned count, float *ptr, MTdata d)
+{
+    cl_uint     i;
+    for (i=0; i<count; i++)
+        ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), d);
+}
+
+static unsigned char *
+generate_rgba8_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static unsigned char *
+randomize_rgba8_image(unsigned char *ptr, int w, int h, MTdata d)
+{
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+int
+test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_float            *input_ptr[2], *output_ptr;
+    cl_program            program;
+    cl_kernel           kernel;
+    size_t              threads[3]={0,0,0};
+    cl_image_format     img_format;
+    cl_uchar            *rgba8_inptr, *rgba8_outptr;
+    void                *lock_buffer;
+    int                 img_width = 512;
+    int                 img_height = 512;
+    cl_int              err;
+    MTdata              d;
+    RoundingMode        oldRoundMode;
+    int                    isRTZ = 0;
+
+    // Block to mark deletion of streams before deletion of host_ptr
+    {
+        clMemWrapper        streams[7];
+
+        PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+        // Alloc buffers
+        input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+        input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+        output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+
+        d = init_genrand( gRandomSeed );
+        rgba8_inptr = (cl_uchar *)generate_rgba8_image(img_width, img_height, d);
+        rgba8_outptr = (cl_uchar *)malloc(sizeof(cl_uchar) * 4 * img_width * img_height);
+
+        // Random data
+        make_random_data(num_elements, input_ptr[0], d);
+        make_random_data(num_elements, input_ptr[1], d);
+
+        // Create host-side input
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[0], &err);
+        test_error(err, "clCreateBuffer 0 failed");
+
+        // Create a copied input
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[1], &err);
+        test_error(err, "clCreateBuffer 1 failed");
+
+        // Create a host-side output
+        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, output_ptr, &err);
+        test_error(err, "clCreateBuffer 2 failed");
+
+        // Create a host-side input
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
+        test_error(err, "create_image_2d 3 failed");
+
+        // Create a copied input
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
+        test_error(err, "create_image_2d 4 failed");
+
+        // Create a host-side output
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
+        test_error(err, "create_image_2d 5 failed");
+
+        // Create a copied output
+        img_format.image_channel_data_type = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[6] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
+        test_error(err, "create_image_2d 6 failed");
+
+        err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );
+        test_error(err, "create_single_kernel_helper failed");
+
+        // Execute kernel
+        err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+        err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+        err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+        test_error(err, "clSetKernelArg failed");
+
+        threads[0] = (size_t)num_elements;
+        err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error(err, "clEnqueueNDRangeKernel failed");
+
+        cl_float *data = (cl_float*) clEnqueueMapBuffer( queue, streams[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float) * num_elements, 0, NULL, NULL, &err );
+        test_error( err, "clEnqueueMapBuffer failed" );
+
+        //If we only support rtz mode
+        if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
+        {
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+            isRTZ = 1;
+        }
+
+        if (isRTZ)
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+
+        // Verify that we got the expected results back on the host side
+        err = verify_hostptr(input_ptr[0], input_ptr[1], data, num_elements);
+        if (err)
+        {
+            log_error("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
+                      "and a CL_MEM_USE_HOST_PTR output did not return the expected results.\n");
+        } else {
+            log_info("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
+                     "and a CL_MEM_USE_HOST_PTR output returned the expected results.\n");
+        }
+
+        if (isRTZ)
+            set_round(oldRoundMode, kfloat);
+
+        err = clEnqueueUnmapMemObject( queue, streams[2], data, 0, NULL, NULL );
+        test_error( err, "clEnqueueUnmapMemObject failed" );
+
+        size_t origin[3]={0,0,0}, region[3]={img_width, img_height, 1};
+        randomize_rgba8_image(rgba8_outptr, img_width, img_height, d);
+        free_mtdata(d); d = NULL;
+
+        // Copy from host-side to host-side
+        log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR...\n");
+        err = clEnqueueCopyImage(queue, streams[3], streams[5],
+                                 origin, origin, region,  0, NULL, NULL);
+        test_error(err, "clEnqueueCopyImage failed");
+        log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR image passed.\n");
+
+        // test the lock buffer interface
+        log_info("Mapping the CL_MEM_USE_HOST_PTR image with clEnqueueMapImage...\n");
+        size_t row_pitch;
+        lock_buffer = clEnqueueMapImage(queue, streams[5], CL_TRUE,
+                                        CL_MAP_READ, origin, region,
+                                        &row_pitch, NULL,
+                                        0, NULL, NULL, &err);
+        test_error(err, "clEnqueueMapImage failed");
+
+        err = verify_rgba8_image(rgba8_inptr, (unsigned char*)lock_buffer, img_width, img_height);
+        if (err != CL_SUCCESS)
+        {
+            log_error("verify_rgba8_image FAILED after clEnqueueMapImage\n");
+            return -1;
+        }
+        log_info("verify_rgba8_image passed after clEnqueueMapImage\n");
+
+        err = clEnqueueUnmapMemObject(queue, streams[5], lock_buffer, 0, NULL, NULL);
+        test_error(err, "clEnqueueUnmapMemObject failed");
+
+        // Copy host-side to device-side and read back
+        log_info("clEnqueueCopyImage CL_MEM_USE_HOST_PTR to CL_MEM_COPY_HOST_PTR...\n");
+        err = clEnqueueCopyImage(queue, streams[3], streams[5],
+                                 origin, origin, region,
+                                 0, NULL, NULL);
+        test_error(err, "clEnqueueCopyImage failed");
+
+        err = clEnqueueReadImage(queue, streams[5], CL_TRUE, origin, region, 4*img_width, 0, rgba8_outptr, 0, NULL, NULL);
+        test_error(err, "clEnqueueReadImage failed");
+
+        err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
+        if (err != CL_SUCCESS)
+        {
+            log_error("verify_rgba8_image FAILED after clEnqueueCopyImage, clEnqueueReadImage\n");
+            return -1;
+        }
+        log_info("verify_rgba8_image passed after clEnqueueCopyImage, clEnqueueReadImage\n");
+    }
+    // cleanup
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    free(rgba8_inptr);
+    free(rgba8_outptr);
+
+    return err;
+}
+
+
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_if.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_if.c
@@ -0,0 +1,165 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *conditional_kernel_code =
+"__kernel void test_if(__global int *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    if (src[tid] == 0)\n"
+"        dst[tid] = 0x12345678;\n"
+"    else if (src[tid] == 1)\n"
+"        dst[tid] = 0x23456781;\n"
+"    else if (src[tid] == 2)\n"
+"        dst[tid] = 0x34567812;\n"
+"    else if (src[tid] == 3)\n"
+"        dst[tid] = 0x45678123;\n"
+"    else if (src[tid] == 4)\n"
+"        dst[tid] = 0x56781234;\n"
+"    else if (src[tid] == 5)\n"
+"        dst[tid] = 0x67812345;\n"
+"    else if (src[tid] == 6)\n"
+"        dst[tid] = 0x78123456;\n"
+"    else if (src[tid] == 7)\n"
+"        dst[tid] = 0x81234567;\n"
+"    else\n"
+"        dst[tid] = 0x7FFFFFFF;\n"
+"\n"
+"}\n";
+
+const int results[] = {
+    0x12345678,
+    0x23456781,
+    0x34567812,
+    0x45678123,
+    0x56781234,
+    0x67812345,
+    0x78123456,
+    0x81234567,
+};
+
+int
+verify_if(int *inptr, int *outptr, int n)
+{
+    int     r, i;
+
+    for (i=0; i<n; i++)
+    {
+        if (inptr[i] <= 7)
+            r = results[inptr[i]];
+        else
+            r = 0x7FFFFFFF;
+
+        if (r != outptr[i])
+        {
+            log_error("IF test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("IF test passed\n");
+    return 0;
+}
+
+int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_int *input_ptr, *output_ptr;
+    cl_program program;
+    cl_kernel kernel;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_int) * num_elements;
+    input_ptr  = (cl_int*)malloc(length);
+    output_ptr = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (int)get_random_float(0, 32, d);
+
+    free_mtdata(d); d = NULL;
+
+  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueWriteBuffer failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &conditional_kernel_code, "test_if" );
+  if (err)
+    return -1;
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)num_elements;
+  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueNDRangeKernel failed\n");
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clReadArray failed\n");
+    return -1;
+  }
+
+  err = verify_if(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_image_multipass.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_image_multipass.c
@@ -0,0 +1,643 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *image_to_image_kernel_integer_coord_code =
+"\n"
+"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+static const char *image_to_image_kernel_float_coord_code =
+"\n"
+"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (float2)((float)tid_x, (float)tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static const char *image_sum_kernel_integer_coord_code =
+"\n"
+"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color0;\n"
+"    float4 color1;\n"
+"\n"
+"    color0 = read_imagef(srcimg0, sampler, (int2)(tid_x, tid_y));\n"
+"    color1 = read_imagef(srcimg1, sampler, (int2)(tid_x, tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color0 + color1);\n"
+"\n"
+"}\n";
+
+
+static const char *image_sum_kernel_float_coord_code =
+"\n"
+"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color0;\n"
+"    float4 color1;\n"
+"\n"
+"    color0 = read_imagef(srcimg0, sampler, (float2)((float)tid_x, (float)tid_y));\n"
+"    color1 = read_imagef(srcimg1, sampler, (float2)((float)tid_x, (float)tid_y));\n"
+"    write_imagef(dstimg,(int2)(tid_x, tid_y), color0 + color1);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_initial_byte_image(int w, int h, int num_elements, unsigned char value)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * num_elements);
+    int             i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+        ptr[i] = value;
+
+    return ptr;
+}
+
+static unsigned char *
+generate_expected_byte_image(unsigned char **input_data, int num_inputs, int w, int h, int num_elements)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * num_elements);
+    int             i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+    {
+        int j;
+        ptr[i] = 0;
+        for (j = 0; j < num_inputs; j++)
+        {
+            unsigned char *input = *(input_data + j);
+            ptr[i] += input[i];
+        }
+    }
+
+    return ptr;
+}
+
+
+static unsigned char *
+generate_byte_image(int w, int h, int num_elements, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * num_elements);
+    int             i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+        ptr[i] = (unsigned char)genrand_int32(d) & 31;
+
+    return ptr;
+}
+
+static int
+verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int num_elements)
+{
+    int     i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int
+test_image_multipass_integer_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int                 img_width = 512;
+    int                 img_height = 512;
+    cl_image_format     img_format;
+
+    int                 num_input_streams = 8;
+    cl_mem              *input_streams;
+    cl_mem                accum_streams[2];
+    unsigned char       *expected_output;
+    unsigned char       *output_ptr;
+    cl_kernel           kernel[2];
+    int                 err;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+
+    expected_output = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+
+    // Create the accum images with initial data.
+    {
+        unsigned char          *initial_data;
+        cl_mem_flags        flags;
+
+        initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[0])
+        {
+            log_error("create_image_2d failed\n");
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+
+        size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+
+        accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[1])
+        {
+            log_error("create_image_2d failed\n");
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+        err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+
+        free(initial_data);
+    }
+
+    // Set up the input data.
+    {
+        cl_mem_flags        flags;
+        unsigned char       **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
+        MTdata              d;
+
+        input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        int i;
+        d = init_genrand( gRandomSeed );
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            input_data[i] = generate_byte_image(img_width, img_height, 4, d);
+            input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+            if (!input_streams[i])
+            {
+                log_error("create_image_2d failed\n");
+                free_mtdata(d);
+                free(expected_output);
+                free(output_ptr);
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
+                                      origin, region, 0, 0,
+                                      input_data[i], 0, NULL, NULL);
+            if (err)
+            {
+                log_error("clWriteImage failed: %d\n", err);
+                free_mtdata(d);
+                free(expected_output);
+                free(output_ptr);
+                free(input_streams);
+                return -1;
+            }
+
+
+        }
+        free_mtdata(d); d = NULL;
+        expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            free(input_data[i]);
+        }
+        free( input_data );
+    }
+
+    // Set up the kernels.
+    {
+        cl_program          program[4];
+
+        err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_integer_coord_code, "image_to_image_copy");
+        if (err)
+        {
+            log_error("Failed to create kernel 0: %d\n", err);
+            return -1;
+        }
+        err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_integer_coord_code, "image_sum");
+        if (err)
+        {
+            log_error("Failed to create kernel 1: %d\n", err);
+            return -1;
+        }
+        clReleaseProgram(program[0]);
+        clReleaseProgram(program[1]);
+    }
+
+    cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+    test_error(err, "clCreateSampler failed");
+
+    {
+        size_t        threads[3] = {0, 0, 0};
+        threads[0] = (size_t)img_width;
+        threads[1] = (size_t)img_height;
+        int i;
+
+        {
+            cl_mem accum_input;
+            cl_mem accum_output;
+
+            err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
+            err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            for (i = 1; i < num_input_streams; i++)
+            {
+                accum_input = accum_streams[(i-1)%2];
+                accum_output = accum_streams[i%2];
+
+                err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
+                err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
+                err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
+                err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
+
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clSetKernelArgs failed\n");
+                    return -1;
+                }
+                err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueNDRangeKernel failed\n");
+                    return -1;
+                }
+            }
+
+            // Copy the last accum into the other one.
+            accum_input = accum_streams[(i-1)%2];
+            accum_output = accum_streams[i%2];
+            err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
+                                     origin, region, 0, 0,
+                                     (void *)output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clReadImage failed\n");
+                return -1;
+            }
+            err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
+            if (err)
+            {
+                log_error("IMAGE_MULTIPASS test failed.\n");
+            }
+            else
+            {
+                log_info("IMAGE_MULTIPASS test passed\n");
+            }
+        }
+
+        clReleaseSampler(sampler);
+    }
+
+
+    // cleanup
+    clReleaseMemObject(accum_streams[0]);
+    clReleaseMemObject(accum_streams[1]);
+    {
+        int i;
+        for (i = 0; i < num_input_streams; i++)
+        {
+            clReleaseMemObject(input_streams[i]);
+        }
+    }
+    free(input_streams);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    free(expected_output);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_image_multipass_float_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int                 img_width = 512;
+    int                 img_height = 512;
+    cl_image_format     img_format;
+
+    int                 num_input_streams = 8;
+    cl_mem              *input_streams;
+    cl_mem                accum_streams[2];
+    unsigned char       *expected_output;
+    unsigned char       *output_ptr;
+    cl_kernel           kernel[2];
+    int                 err;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+
+    // Create the accum images with initial data.
+    {
+        unsigned char          *initial_data;
+        cl_mem_flags        flags;
+
+        initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[0])
+        {
+            log_error("create_image_2d failed\n");
+            return -1;
+        }
+
+        size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            return -1;
+        }
+
+        accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[1])
+        {
+            log_error("create_image_2d failed\n");
+            return -1;
+        }
+        err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            return -1;
+        }
+
+        free(initial_data);
+    }
+
+    // Set up the input data.
+    {
+        cl_mem_flags        flags;
+        unsigned char       **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
+        MTdata              d;
+
+        input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        int i;
+        d = init_genrand( gRandomSeed );
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            input_data[i] = generate_byte_image(img_width, img_height, 4, d);
+            input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+            if (!input_streams[i])
+            {
+                log_error("create_image_2d failed\n");
+                free(input_data);
+                free(input_streams);
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
+                                      origin, region, 0, 0,
+                                      input_data[i], 0, NULL, NULL);
+            if (err)
+            {
+                log_error("clWriteImage failed: %d\n", err);
+                free(input_data);
+                free(input_streams);
+                return -1;
+            }
+        }
+        free_mtdata(d); d = NULL;
+        expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            free(input_data[i]);
+        }
+        free(input_data);
+    }
+
+    // Set up the kernels.
+    {
+        cl_program          program[2];
+
+        err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_float_coord_code, "image_to_image_copy");
+        if (err)
+        {
+            log_error("Failed to create kernel 2: %d\n", err);
+            return -1;
+        }
+        err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_float_coord_code, "image_sum");
+        if (err)
+        {
+            log_error("Failed to create kernel 3: %d\n", err);
+            return -1;
+        }
+
+        clReleaseProgram(program[0]);
+        clReleaseProgram(program[1]);
+    }
+
+    cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+    test_error(err, "clCreateSampler failed");
+
+    {
+        size_t        threads[3] = {0, 0, 0};
+        threads[0] = (size_t)img_width;
+        threads[1] = (size_t)img_height;
+        int i;
+
+        {
+            cl_mem accum_input;
+            cl_mem accum_output;
+
+            err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
+            err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            for (i = 1; i < num_input_streams; i++)
+            {
+                accum_input = accum_streams[(i-1)%2];
+                accum_output = accum_streams[i%2];
+
+                err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
+                err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
+                err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
+                err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
+
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clSetKernelArgs failed\n");
+                    return -1;
+                }
+                err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueNDRangeKernel failed\n");
+                    return -1;
+                }
+            }
+
+            // Copy the last accum into the other one.
+            accum_input = accum_streams[(i-1)%2];
+            accum_output = accum_streams[i%2];
+            err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
+                                     origin, region, 0, 0,
+                                     (void *)output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clReadImage failed\n");
+                return -1;
+            }
+            err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
+            if (err)
+            {
+                log_error("IMAGE_MULTIPASS test failed.\n");
+            }
+            else
+            {
+                log_info("IMAGE_MULTIPASS test passed\n");
+            }
+        }
+
+    }
+
+
+    // cleanup
+    clReleaseSampler(sampler);
+    clReleaseMemObject(accum_streams[0]);
+    clReleaseMemObject(accum_streams[1]);
+    {
+        int i;
+        for (i = 0; i < num_input_streams; i++)
+        {
+            clReleaseMemObject(input_streams[i]);
+        }
+    }
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    free(expected_output);
+    free(output_ptr);
+    free(input_streams);
+
+    return err;
+}
+
+
+
+
+
--- a/test_conformance/compatibility/test_conformance/basic/test_image_param.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_image_param.c
@@ -0,0 +1,251 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/imageHelpers.h"
+#include "../../test_common/harness/conversions.h"
+
+
+static const char *param_kernel[] = {
+"__kernel void test_fn(read_only image2d_t srcimg, sampler_t sampler, __global float4 *results )\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    results[ tid_y * get_image_width( srcimg ) + tid_x ] = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"\n"
+"}\n" };
+
+int validate_results( size_t width, size_t height, cl_image_format &format, char *inputData, cl_float *actualResults )
+{
+    for( size_t i = 0; i < width * height; i++ )
+    {
+        cl_float expected[ 4 ], tolerance;
+
+        switch( format.image_channel_data_type )
+        {
+            case CL_UNORM_INT8:
+            {
+                cl_uchar *p = (cl_uchar *)inputData;
+                expected[ 0 ] = p[ 0 ] / 255.f;
+                expected[ 1 ] = p[ 1 ] / 255.f;
+                expected[ 2 ] = p[ 2 ] / 255.f;
+                expected[ 3 ] = p[ 3 ] / 255.f;
+                tolerance = 1.f / 255.f;
+                break;
+            }
+            case CL_SNORM_INT8:
+            {
+                cl_char *p = (cl_char *)inputData;
+                expected[ 0 ] = fmaxf( p[ 0 ] / 127.f, -1.f );
+                expected[ 1 ] = fmaxf( p[ 1 ] / 127.f, -1.f );
+                expected[ 2 ] = fmaxf( p[ 2 ] / 127.f, -1.f );
+                expected[ 3 ] = fmaxf( p[ 3 ] / 127.f, -1.f );
+                tolerance = 1.f / 127.f;
+                break;
+            }
+            case CL_UNSIGNED_INT8:
+            {
+                cl_uchar *p = (cl_uchar *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 1.f / 127.f;
+                break;
+            }
+            case CL_SIGNED_INT8:
+            {
+                cl_short *p = (cl_short *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 1.f / 127.f;
+                break;
+            }
+            case CL_UNORM_INT16:
+            {
+                cl_ushort *p = (cl_ushort *)inputData;
+                expected[ 0 ] = p[ 0 ] / 65535.f;
+                expected[ 1 ] = p[ 1 ] / 65535.f;
+                expected[ 2 ] = p[ 2 ] / 65535.f;
+                expected[ 3 ] = p[ 3 ] / 65535.f;
+                tolerance = 1.f / 65535.f;
+                break;
+            }
+            case CL_UNSIGNED_INT32:
+            {
+                cl_uint *p = (cl_uint *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 0.0001f;
+                break;
+            }
+            case CL_FLOAT:
+            {
+                cl_float *p = (cl_float *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 0.0001f;
+                break;
+            }
+            default:
+                // Should never get here
+                break;
+        }
+
+        if( format.image_channel_order == CL_BGRA )
+        {
+            cl_float tmp = expected[ 0 ];
+            expected[ 0 ] = expected[ 2 ];
+            expected[ 2 ] = tmp;
+        }
+
+        // Within an error tolerance, make sure the results match
+        cl_float error1 = fabsf( expected[ 0 ] - actualResults[ 0 ] );
+        cl_float error2 = fabsf( expected[ 1 ] - actualResults[ 1 ] );
+        cl_float error3 = fabsf( expected[ 2 ] - actualResults[ 2 ] );
+        cl_float error4 = fabsf( expected[ 3 ] - actualResults[ 3 ] );
+
+        if( error1 > tolerance || error2 > tolerance || error3 > tolerance || error4 > tolerance )
+        {
+            log_error( "ERROR: Sample %d did not validate against expected results for %d x %d %s:%s image\n", (int)i, (int)width, (int)height,
+                            GetChannelOrderName( format.image_channel_order ), GetChannelTypeName( format.image_channel_data_type ) );
+            log_error( "    Expected: %f %f %f %f\n", (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ] );
+            log_error( "      Actual: %f %f %f %f\n", (float)actualResults[ 0 ], (float)actualResults[ 1 ], (float)actualResults[ 2 ], (float)actualResults[ 3 ] );
+
+            // Check real quick a special case error here
+            cl_float error1 = fabsf( expected[ 3 ] - actualResults[ 0 ] );
+            cl_float error2 = fabsf( expected[ 2 ] - actualResults[ 1 ] );
+            cl_float error3 = fabsf( expected[ 1 ] - actualResults[ 2 ] );
+            cl_float error4 = fabsf( expected[ 0 ] - actualResults[ 3 ] );
+            if( error1 <= tolerance && error2 <= tolerance && error3 <= tolerance && error4 <= tolerance )
+            {
+                log_error( "\t(Kernel did not respect change in channel order)\n" );
+            }
+            return -1;
+        }
+
+        // Increment and go
+        actualResults += 4;
+        inputData += get_format_type_size( &format ) * 4;
+    }
+
+    return 0;
+}
+
+int test_image_param(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t              sizes[] = { 64, 100, 128, 250, 512 };
+    cl_image_format      formats[] = { { CL_RGBA, CL_UNORM_INT8 }, { CL_RGBA, CL_UNORM_INT16 }, { CL_RGBA, CL_FLOAT }, { CL_BGRA, CL_UNORM_INT8 } };
+    ExplicitType      types[] =  { kUChar, kUShort, kFloat, kUChar };
+    int               error;
+    size_t            i, j, idx;
+    size_t            threads[ 2 ];
+    MTdata            d;
+
+    const size_t numSizes = sizeof( sizes ) / sizeof( sizes[ 0 ] );
+    const size_t numFormats = sizeof( formats ) / sizeof( formats[ 0 ] );
+    const size_t numAttempts = numSizes * numFormats;
+
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ numAttempts ][ 2 ];
+    BufferOwningPtr<char> inputs[ numAttempts ];
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    for( i = 0, idx = 0; i < numSizes; i++ )
+    {
+        for( j = 0; j < numFormats; j++, idx++ )
+        {
+            // For each attempt, we create a pair: an input image, whose parameters keep changing, and an output buffer
+            // that we can read values from. The output buffer will remain consistent to ensure that any changes we
+            // witness are due to the image changes
+            inputs[ idx ].reset(create_random_data( types[ j ], d, sizes[ i ] * sizes[ i ] * 4 ));
+
+            streams[ idx ][ 0 ] = create_image_2d( context, CL_MEM_COPY_HOST_PTR, &formats[ j ], sizes[ i ], sizes[ i ], 0, inputs[ idx ], &error );
+            {
+                char err_str[256];
+                sprintf(err_str, "Unable to create input image for format %s order %s" ,
+                                  GetChannelOrderName( formats[j].image_channel_order ),
+                                  GetChannelTypeName( formats[j].image_channel_data_type ));
+                test_error( error, err_str);
+            }
+
+            streams[ idx ][ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), NULL, &error );
+            test_error( error, "Unable to create output buffer" );
+        }
+    }
+    free_mtdata(d); d = NULL;
+
+    // Create a single kernel to use for all the tests
+    error = create_single_kernel_helper( context, &program, &kernel, 1, param_kernel, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Also create a sampler to use for all the runs
+    clSamplerWrapper sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &error );
+    test_error( error, "clCreateSampler failed" );
+
+    // Set up the arguments for each and queue
+    for( i = 0, idx = 0; i < numSizes; i++ )
+    {
+        for( j = 0; j < numFormats; j++, idx++ )
+        {
+            error = clSetKernelArg( kernel, 0, sizeof( streams[ idx ][ 0 ] ), &streams[ idx ][ 0 ] );
+            error |= clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
+            error |= clSetKernelArg( kernel, 2, sizeof( streams[ idx ][ 1 ] ), &streams[ idx ][ 1 ]);
+            test_error( error, "Unable to set kernel arguments" );
+
+            threads[ 0 ] = threads[ 1 ] = (size_t)sizes[ i ];
+
+            error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( error, "clEnqueueNDRangeKernel failed" );
+        }
+    }
+
+    // Now go through each combo and validate the results
+    for( i = 0, idx = 0; i < numSizes; i++ )
+    {
+        for( j = 0; j < numFormats; j++, idx++ )
+        {
+            BufferOwningPtr<cl_float> output(malloc(sizeof(cl_float) * sizes[ i ] * sizes[ i ] * 4 ));
+
+            error = clEnqueueReadBuffer( queue, streams[ idx ][ 1 ], CL_TRUE, 0, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), output, 0, NULL, NULL );
+            test_error( error, "Unable to read results" );
+
+            error = validate_results( sizes[ i ], sizes[ i ], formats[ j ], inputs[ idx ], output );
+            if( error )
+                return -1;
+        }
+    }
+
+    return 0;
+}
--- a/Show More
+++ b/Show More